{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 262,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['河马', '蟒蛇', '老虎', '大象', '兔子', '熊猫', '狮子']"
      ]
     },
     "execution_count": 262,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " Faker.choose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 264,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[47, 45, 99, 94, 120, 77, 132]"
      ]
     },
     "execution_count": 264,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Faker.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 265,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyecharts.charts.composite_charts.timeline.Timeline at 0x17610d623d0>"
      ]
     },
     "execution_count": 265,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Timeline()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\timeline_pie.html'"
      ]
     },
     "execution_count": 259,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Pie, Timeline\n",
    "from pyecharts.faker import Faker\n",
    "\n",
    "attr = Faker.choose()\n",
    "tl = Timeline()\n",
    "for i in range(2015, 2020):\n",
    "    pie = (\n",
    "        Pie()\n",
    "        .add(\n",
    "            \"商家A\",\n",
    "            [list(z) for z in zip(attr, Faker.values())],\n",
    "            rosetype=\"radius\",\n",
    "            radius=[\"30%\", \"55%\"],\n",
    "        )\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(\"某商店{}年营业额\".format(i)))\n",
    "    )\n",
    "    tl.add(pie, \"{}年\".format(i))\n",
    "tl.render(\"timeline_pie.html\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "```\n",
    "数据收集：许智超，张洁琳\n",
    "query=' (SU='媒体融合' AND SU = '大数据') OR  (SU='媒体融合' AND SU = '人工智能') ' \n",
    "精确查找\n",
    "时间：2021.6.21\n",
    "主题：媒体融合+人工智能大数据\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据准备与清洗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import requests,requests_html\n",
    "import os\n",
    "from glob import glob \n",
    "from lxml import etree"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据准备（.xls）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['raw_data\\\\374_01.xls']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_data_src = \"raw_data\"\n",
    "fn_listA = glob(os.path.join(path_data_src,\"*.xls\"))\n",
    "fn_listA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'raw_data\\\\374_01.xls'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_listA =[fn for fn in fn_listA]\n",
    "df_listA[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取文件（.xls, .doc）\n",
    "def ReadDf(file,list_series):\n",
    "    '''xls的list_series=0，doc的list_series=1'''\n",
    "    with open(file, encoding=\"utf8\") as f:\n",
    "        content = \"\\n\".join(f.readlines())\n",
    "        df = pd.read_html(content)[list_series] \n",
    "        columns_name = df.columns.tolist()\n",
    "        print(\"columns原名称：\",columns_name)   \n",
    "        columns_news = df.loc[0].to_list()\n",
    "        print(\"columns新名称：\",columns_news)\n",
    "        df.columns=columns_news\n",
    "        df = df.iloc[1:]\n",
    "    return df "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "columns原名称： [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n",
      "columns新名称： ['RT', 'SR', 'A1', 'AD', 'T1', 'JF', 'YR', 'IS', 'vo', 'OP', 'K1', 'AB', 'SN', 'CN', 'LA', 'DS']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RT</th>\n",
       "      <th>SR</th>\n",
       "      <th>A1</th>\n",
       "      <th>AD</th>\n",
       "      <th>T1</th>\n",
       "      <th>JF</th>\n",
       "      <th>YR</th>\n",
       "      <th>IS</th>\n",
       "      <th>vo</th>\n",
       "      <th>OP</th>\n",
       "      <th>K1</th>\n",
       "      <th>AB</th>\n",
       "      <th>SN</th>\n",
       "      <th>CN</th>\n",
       "      <th>LA</th>\n",
       "      <th>DS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>沈晖;</td>\n",
       "      <td>南通大学信息化中心;</td>\n",
       "      <td>数据融通：县级融媒体中心的数据利用与功能发挥</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30-31+33</td>\n",
       "      <td>大数据;县级融媒体中心;媒体融合</td>\n",
       "      <td>在当前全媒体时代和媒体融合发展的大背景下,县级融媒体中心建设的重要性越来越得到重视,下一步需...</td>\n",
       "      <td>1009-9263</td>\n",
       "      <td>11-4574/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>黄楚新;许可;</td>\n",
       "      <td>中国社会科学院大学新闻传播学院;中国社会科学院新闻与传播研究所数字媒体研究室;</td>\n",
       "      <td>人工智能技术驱动传媒业发展的三个维度</td>\n",
       "      <td>现代出版</td>\n",
       "      <td>2021</td>\n",
       "      <td>03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43-48</td>\n",
       "      <td>人工智能技术;媒体智能化;内容生产;社会治理</td>\n",
       "      <td>人工智能技术在自身的发展变革中不断嵌入社会,并赋能媒体,对传媒业产生了多样化的影响。理解媒体...</td>\n",
       "      <td>2095-0330</td>\n",
       "      <td>11-5979/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>郭晓敏;</td>\n",
       "      <td>河北大学新闻传播学院;</td>\n",
       "      <td>人民日报融合新闻生产的特色</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>31-32</td>\n",
       "      <td>人民日报;融合新闻;“中央厨房”</td>\n",
       "      <td>全媒体时代,技术成为媒体发展的核心驱动。主流媒体必须在技术赋能下快速转变经营思维,构建融合新...</td>\n",
       "      <td>1009-9263</td>\n",
       "      <td>11-4574/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>刘峰;罗敦洲;</td>\n",
       "      <td>宿迁学院;江苏省宿迁市委宣传部;</td>\n",
       "      <td>智媒时代县级融媒建设发展的制约瓶颈与应对策略</td>\n",
       "      <td>出版发行研究</td>\n",
       "      <td>2021</td>\n",
       "      <td>05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>57-64</td>\n",
       "      <td>智媒时代;县级融媒;“五为”原则</td>\n",
       "      <td>推进智媒时代的县级融媒建设是党中央巩固基层思想舆论阵地的重大战略部署,是扩大基层媒体影响力的...</td>\n",
       "      <td>1001-9316</td>\n",
       "      <td>11-1537/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>寇京晶;朱磊;</td>\n",
       "      <td>中国青年报社产品研发运营部;中青在线;中国青年报;</td>\n",
       "      <td>中国青年报社融媒云厨技术的实践与思考</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021</td>\n",
       "      <td>09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43-45</td>\n",
       "      <td>融媒云厨;媒体融合;技术创新;应用场景;人工智能;研发路径</td>\n",
       "      <td>中国青年报社的媒体融合实践,是从\"互联网+\"物理重构到\"互联网×\"化学质变的过程,2019年...</td>\n",
       "      <td>1009-9263</td>\n",
       "      <td>11-4574/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>张炼;</td>\n",
       "      <td>暨南大学新闻与传播学院;</td>\n",
       "      <td>新媒体时代广告业的转型升级</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2014</td>\n",
       "      <td>17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>71</td>\n",
       "      <td>广告业;新媒体广告;创意文化产业;新媒体时代;</td>\n",
       "      <td>在全球传媒产业转型升级的时代,广告行业也面临着转型的需求。研究新媒体时代广告业的转型升级,必...</td>\n",
       "      <td>1002-2759</td>\n",
       "      <td>37-1003/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>梁小庆;</td>\n",
       "      <td>中央广播电视大学音像出版社;</td>\n",
       "      <td>IT第三平台对开放教育资源建设的影响</td>\n",
       "      <td>中国远程教育</td>\n",
       "      <td>2014</td>\n",
       "      <td>03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>72-81</td>\n",
       "      <td>第三平台;开放教育;资源建设;泛在学习;媒体融合</td>\n",
       "      <td>信息和通信技术已经发展到以移动网络、云计算、大数据和社交网络技术为代表的第三平台阶段,这对社...</td>\n",
       "      <td>1009-458X</td>\n",
       "      <td>11-4089/G4</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>史鹏程;</td>\n",
       "      <td>北京中科大洋科技发展股份有限公司新闻制作部;</td>\n",
       "      <td>全媒体融合生产业务和技术应用探讨</td>\n",
       "      <td>电视技术</td>\n",
       "      <td>2014</td>\n",
       "      <td>02</td>\n",
       "      <td>38</td>\n",
       "      <td>72-76+85</td>\n",
       "      <td>全媒体融合生产;3G/4G新闻直播;全媒体信息采集;大数据安全传输;云编辑;二维码</td>\n",
       "      <td>全媒体时代的新闻制播业务对节目数量、质量、内容及效率都提出了更高要求,但就目前的情况来看,全...</td>\n",
       "      <td>1002-8692</td>\n",
       "      <td>11-2123/TN</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>374</th>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>高宪春;解葳;</td>\n",
       "      <td>江苏师范大学传媒与影视学院;济宁学院;</td>\n",
       "      <td>媒体融合背景下视听媒体创新途径再分析</td>\n",
       "      <td>电视研究</td>\n",
       "      <td>2014</td>\n",
       "      <td>01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62-65</td>\n",
       "      <td>媒体创新;社交化;移动化;微型化;媒介融合背景;广电媒体;媒体融合;</td>\n",
       "      <td>随着三网融合的推进,当下广电媒体的发展进入到关键实质性阶段。创新什么、如何创新,成为媒体融合...</td>\n",
       "      <td>1007-3930</td>\n",
       "      <td>11-3068/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>375</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>375 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  RT   SR       A1                                       AD  \\\n",
       "1    Journal Article    1      沈晖;                               南通大学信息化中心;   \n",
       "2    Journal Article    1  黄楚新;许可;  中国社会科学院大学新闻传播学院;中国社会科学院新闻与传播研究所数字媒体研究室;   \n",
       "3    Journal Article    1     郭晓敏;                              河北大学新闻传播学院;   \n",
       "4    Journal Article    1  刘峰;罗敦洲;                         宿迁学院;江苏省宿迁市委宣传部;   \n",
       "5    Journal Article    1  寇京晶;朱磊;                中国青年报社产品研发运营部;中青在线;中国青年报;   \n",
       "..               ...  ...      ...                                      ...   \n",
       "371  Journal Article    1      张炼;                             暨南大学新闻与传播学院;   \n",
       "372  Journal Article    1     梁小庆;                           中央广播电视大学音像出版社;   \n",
       "373  Journal Article    1     史鹏程;                   北京中科大洋科技发展股份有限公司新闻制作部;   \n",
       "374  Journal Article    1  高宪春;解葳;                      江苏师范大学传媒与影视学院;济宁学院;   \n",
       "375              NaN  NaN      NaN                                      NaN   \n",
       "\n",
       "                         T1      JF    YR   IS   vo        OP  \\\n",
       "1    数据融通：县级融媒体中心的数据利用与功能发挥      传媒  2021   11  NaN  30-31+33   \n",
       "2        人工智能技术驱动传媒业发展的三个维度    现代出版  2021   03  NaN     43-48   \n",
       "3             人民日报融合新闻生产的特色      传媒  2021   10  NaN     31-32   \n",
       "4    智媒时代县级融媒建设发展的制约瓶颈与应对策略  出版发行研究  2021   05  NaN     57-64   \n",
       "5        中国青年报社融媒云厨技术的实践与思考      传媒  2021   09  NaN     43-45   \n",
       "..                      ...     ...   ...  ...  ...       ...   \n",
       "371           新媒体时代广告业的转型升级    青年记者  2014   17  NaN        71   \n",
       "372      IT第三平台对开放教育资源建设的影响  中国远程教育  2014   03  NaN     72-81   \n",
       "373        全媒体融合生产业务和技术应用探讨    电视技术  2014   02   38  72-76+85   \n",
       "374      媒体融合背景下视听媒体创新途径再分析    电视研究  2014   01  NaN     62-65   \n",
       "375                     NaN     NaN   NaN  NaN  NaN       NaN   \n",
       "\n",
       "                                            K1  \\\n",
       "1                             大数据;县级融媒体中心;媒体融合   \n",
       "2                       人工智能技术;媒体智能化;内容生产;社会治理   \n",
       "3                             人民日报;融合新闻;“中央厨房”   \n",
       "4                             智媒时代;县级融媒;“五为”原则   \n",
       "5                融媒云厨;媒体融合;技术创新;应用场景;人工智能;研发路径   \n",
       "..                                         ...   \n",
       "371                    广告业;新媒体广告;创意文化产业;新媒体时代;   \n",
       "372                   第三平台;开放教育;资源建设;泛在学习;媒体融合   \n",
       "373  全媒体融合生产;3G/4G新闻直播;全媒体信息采集;大数据安全传输;云编辑;二维码   \n",
       "374         媒体创新;社交化;移动化;微型化;媒介融合背景;广电媒体;媒体融合;   \n",
       "375                                        NaN   \n",
       "\n",
       "                                                    AB         SN          CN  \\\n",
       "1    在当前全媒体时代和媒体融合发展的大背景下,县级融媒体中心建设的重要性越来越得到重视,下一步需...  1009-9263  11-4574/G2   \n",
       "2    人工智能技术在自身的发展变革中不断嵌入社会,并赋能媒体,对传媒业产生了多样化的影响。理解媒体...  2095-0330  11-5979/G2   \n",
       "3    全媒体时代,技术成为媒体发展的核心驱动。主流媒体必须在技术赋能下快速转变经营思维,构建融合新...  1009-9263  11-4574/G2   \n",
       "4    推进智媒时代的县级融媒建设是党中央巩固基层思想舆论阵地的重大战略部署,是扩大基层媒体影响力的...  1001-9316  11-1537/G2   \n",
       "5    中国青年报社的媒体融合实践,是从\"互联网+\"物理重构到\"互联网×\"化学质变的过程,2019年...  1009-9263  11-4574/G2   \n",
       "..                                                 ...        ...         ...   \n",
       "371  在全球传媒产业转型升级的时代,广告行业也面临着转型的需求。研究新媒体时代广告业的转型升级,必...  1002-2759  37-1003/G2   \n",
       "372  信息和通信技术已经发展到以移动网络、云计算、大数据和社交网络技术为代表的第三平台阶段,这对社...  1009-458X  11-4089/G4   \n",
       "373  全媒体时代的新闻制播业务对节目数量、质量、内容及效率都提出了更高要求,但就目前的情况来看,全...  1002-8692  11-2123/TN   \n",
       "374  随着三网融合的推进,当下广电媒体的发展进入到关键实质性阶段。创新什么、如何创新,成为媒体融合...  1007-3930  11-3068/G2   \n",
       "375                                                NaN        NaN         NaN   \n",
       "\n",
       "      LA    DS  \n",
       "1    中文;  CNKI  \n",
       "2    中文;  CNKI  \n",
       "3    中文;  CNKI  \n",
       "4    中文;  CNKI  \n",
       "5    中文;  CNKI  \n",
       "..   ...   ...  \n",
       "371  中文;  CNKI  \n",
       "372  中文;  CNKI  \n",
       "373  中文;  CNKI  \n",
       "374  中文;  CNKI  \n",
       "375  NaN   NaN  \n",
       "\n",
       "[375 rows x 16 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfA = ReadDf(df_listA[0],0)\n",
    "dfA"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据准备（.doc）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['raw_data\\\\374_01.doc']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_data_src = \"raw_data\"\n",
    "fn_listB = glob(os.path.join(path_data_src,\"*.doc\"))\n",
    "fn_listB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'raw_data\\\\374_01.doc'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_listB =[fn for fn in fn_listB]\n",
    "df_listB[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "columns原名称： [0, 1, 2, 3, 4, 5, 6]\n",
      "columns新名称： ['序号', '题名', '(第一)作者/主编', '来源', '发表时间', '被引', '下载']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>题名</th>\n",
       "      <th>(第一)作者/主编</th>\n",
       "      <th>来源</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>数据融通：县级融媒体中心的数据利用与功能发挥</td>\n",
       "      <td>沈晖;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-06-10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>人工智能技术驱动传媒业发展的三个维度</td>\n",
       "      <td>黄楚新;许可;</td>\n",
       "      <td>现代出版</td>\n",
       "      <td>2021-05-25</td>\n",
       "      <td>0</td>\n",
       "      <td>234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>人民日报融合新闻生产的特色</td>\n",
       "      <td>郭晓敏;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-05-25</td>\n",
       "      <td>0</td>\n",
       "      <td>214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>智媒时代县级融媒建设发展的制约瓶颈与应对策略</td>\n",
       "      <td>刘峰;罗敦洲;</td>\n",
       "      <td>出版发行研究</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>中国青年报社融媒云厨技术的实践与思考</td>\n",
       "      <td>寇京晶;朱磊;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-05-10</td>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>370</td>\n",
       "      <td>勿表面理解媒体融合</td>\n",
       "      <td>陈红玉;</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2014-08-30</td>\n",
       "      <td>8</td>\n",
       "      <td>101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>371</td>\n",
       "      <td>新媒体时代广告业的转型升级</td>\n",
       "      <td>张炼;</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2014-06-20</td>\n",
       "      <td>3</td>\n",
       "      <td>345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>372</td>\n",
       "      <td>IT第三平台对开放教育资源建设的影响</td>\n",
       "      <td>梁小庆;</td>\n",
       "      <td>中国远程教育</td>\n",
       "      <td>2014-03-06</td>\n",
       "      <td>3</td>\n",
       "      <td>240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>373</td>\n",
       "      <td>全媒体融合生产业务和技术应用探讨</td>\n",
       "      <td>史鹏程;</td>\n",
       "      <td>电视技术</td>\n",
       "      <td>2014-01-17</td>\n",
       "      <td>9</td>\n",
       "      <td>169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>374</th>\n",
       "      <td>374</td>\n",
       "      <td>媒体融合背景下视听媒体创新途径再分析</td>\n",
       "      <td>高宪春;解葳;</td>\n",
       "      <td>电视研究</td>\n",
       "      <td>2014-01-05</td>\n",
       "      <td>7</td>\n",
       "      <td>567</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号                      题名 (第一)作者/主编      来源        发表时间 被引   下载\n",
       "1      1  数据融通：县级融媒体中心的数据利用与功能发挥       沈晖;      传媒  2021-06-10  0    0\n",
       "2      2      人工智能技术驱动传媒业发展的三个维度   黄楚新;许可;    现代出版  2021-05-25  0  234\n",
       "3      3           人民日报融合新闻生产的特色      郭晓敏;      传媒  2021-05-25  0  214\n",
       "4      4  智媒时代县级融媒建设发展的制约瓶颈与应对策略   刘峰;罗敦洲;  出版发行研究  2021-05-15  0   34\n",
       "5      5      中国青年报社融媒云厨技术的实践与思考   寇京晶;朱磊;      传媒  2021-05-10  0   19\n",
       "..   ...                     ...       ...     ...         ... ..  ...\n",
       "370  370               勿表面理解媒体融合      陈红玉;    青年记者  2014-08-30  8  101\n",
       "371  371           新媒体时代广告业的转型升级       张炼;    青年记者  2014-06-20  3  345\n",
       "372  372      IT第三平台对开放教育资源建设的影响      梁小庆;  中国远程教育  2014-03-06  3  240\n",
       "373  373        全媒体融合生产业务和技术应用探讨      史鹏程;    电视技术  2014-01-17  9  169\n",
       "374  374      媒体融合背景下视听媒体创新途径再分析   高宪春;解葳;    电视研究  2014-01-05  7  567\n",
       "\n",
       "[374 rows x 7 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfB = ReadDf(df_listB[0],1)\n",
    "dfB "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  A+B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "除错 0\n",
      "检查数量 374\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>序号</th>\n",
       "      <th>题名</th>\n",
       "      <th>(第一)作者/主编</th>\n",
       "      <th>来源</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>RT</th>\n",
       "      <th>SR</th>\n",
       "      <th>...</th>\n",
       "      <th>YR</th>\n",
       "      <th>IS</th>\n",
       "      <th>vo</th>\n",
       "      <th>OP</th>\n",
       "      <th>K1</th>\n",
       "      <th>AB</th>\n",
       "      <th>SN</th>\n",
       "      <th>CN</th>\n",
       "      <th>LA</th>\n",
       "      <th>DS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>122</td>\n",
       "      <td>高校党建新媒体传播的理论、方法与策略</td>\n",
       "      <td>卢迪;邱子欣;</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2019-06-10</td>\n",
       "      <td>7</td>\n",
       "      <td>656</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2019</td>\n",
       "      <td>06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20-26</td>\n",
       "      <td>高校;党建;新媒体</td>\n",
       "      <td>近年来,党中央对党建信息化提出了一系列新思想、新观点、新要求,为全面提高党建信息化水平指明了...</td>\n",
       "      <td>1671-9220</td>\n",
       "      <td>11-4795/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>314</td>\n",
       "      <td>314</td>\n",
       "      <td>颠覆与重构:新闻融合传播的策略与路径</td>\n",
       "      <td>金莉萍;</td>\n",
       "      <td>现代传播(中国传媒大学学报)</td>\n",
       "      <td>2016-01-15</td>\n",
       "      <td>11</td>\n",
       "      <td>927</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2016</td>\n",
       "      <td>01</td>\n",
       "      <td>38</td>\n",
       "      <td>158-159</td>\n",
       "      <td>新闻融合;策略与路径;互联网思维;内容为王;媒体融合;编辑部;策划设计;</td>\n",
       "      <td>在移动互联网时代,新技术、大数据改变了人们接触、参与信息传播的方式,引发了传播技术和信息传播...</td>\n",
       "      <td>1007-8770</td>\n",
       "      <td>11-5363/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>162</td>\n",
       "      <td>162</td>\n",
       "      <td>题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析</td>\n",
       "      <td>文铭权;李朗;</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2018-11-10</td>\n",
       "      <td>3</td>\n",
       "      <td>457</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2018</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>91-94</td>\n",
       "      <td>媒体融合;数据新闻</td>\n",
       "      <td>在媒体融合发展战略中,数据新闻是一种重要的实现形式。四川日报\"MORE数据新闻频道\"在题材选...</td>\n",
       "      <td>1007-2438</td>\n",
       "      <td>51-1046/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>208</td>\n",
       "      <td>208</td>\n",
       "      <td>面向媒体融合的出版企业内容运营策略——以RAYS平台为例</td>\n",
       "      <td>白立华;刘永坚;施其明;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2018-01-25</td>\n",
       "      <td>11</td>\n",
       "      <td>291</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2018</td>\n",
       "      <td>02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>68-70</td>\n",
       "      <td>出版企业;媒体融合;内容运营;RAYS平台</td>\n",
       "      <td>在媒体融合中,优质内容资源的合理运营是传统出版企业实现转型升级的基础。但当前许多传统出版企业...</td>\n",
       "      <td>1009-9263</td>\n",
       "      <td>11-4574/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>111</td>\n",
       "      <td>111</td>\n",
       "      <td>闯过深水区:媒体融合要抓住三大关键问题</td>\n",
       "      <td>徐世平;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2019-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>86-89</td>\n",
       "      <td>媒体融合;信息革命;内容建设;资本;5G</td>\n",
       "      <td>传统媒体在体制机制、政策措施、流程管理、人才技术等方面加快融合步伐,建立融合传播矩阵、打造融...</td>\n",
       "      <td>1002-2295</td>\n",
       "      <td>11-1109/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>2015年传媒业猜想</td>\n",
       "      <td>郭全中;</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2015-01-10</td>\n",
       "      <td>2</td>\n",
       "      <td>197</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11-12</td>\n",
       "      <td>传媒业;媒体融合;传统媒体广告;</td>\n",
       "      <td>互联网将成为影响2015年中国传媒业发展的关键因素,传媒业的很多方面都会打上互联网的烙印。2...</td>\n",
       "      <td>1002-2759</td>\n",
       "      <td>37-1003/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>353</td>\n",
       "      <td>353</td>\n",
       "      <td>2014年网络新媒体研究新触点及走向</td>\n",
       "      <td>孟威;</td>\n",
       "      <td>当代传播</td>\n",
       "      <td>2015-03-15</td>\n",
       "      <td>4</td>\n",
       "      <td>732</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4-7+13</td>\n",
       "      <td>媒体融合;手机传播;“微”传播;大数据;新媒体</td>\n",
       "      <td>近两年,中国期刊网收录新媒体研究相关文献比起以往又有明显增加。文献在发表期刊和学科基础上虽有...</td>\n",
       "      <td>1009-5322</td>\n",
       "      <td>65-1201/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>361</td>\n",
       "      <td>361</td>\n",
       "      <td>2014年中国新媒体传播研究综述</td>\n",
       "      <td>付玉辉;</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2015-01-23</td>\n",
       "      <td>26</td>\n",
       "      <td>2993</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>01</td>\n",
       "      <td>37</td>\n",
       "      <td>35-46</td>\n",
       "      <td>新媒体;4G;媒体融合;互联网思维;产业互联网</td>\n",
       "      <td>本文对2014年中国新媒体传播研究的整体情况进行了梳理和归纳,认为这年的中国新媒体传播研究进...</td>\n",
       "      <td>1002-5685</td>\n",
       "      <td>11-1523/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>358</td>\n",
       "      <td>358</td>\n",
       "      <td>2014年中国广播发展图景</td>\n",
       "      <td>孟伟;</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2015-03-01</td>\n",
       "      <td>8</td>\n",
       "      <td>357</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14-21</td>\n",
       "      <td>广播;聚合;移动互联网;媒体融合;大数据</td>\n",
       "      <td>2014年是我国传统广播深入探索媒体融合的一年。从媒体形态转型、产业模式转型,到管理体制和机...</td>\n",
       "      <td>1002-8552</td>\n",
       "      <td>11-1746/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>271</td>\n",
       "      <td>271</td>\n",
       "      <td>2004—2016我国传媒经济学的研究进展(上)</td>\n",
       "      <td>吴信训;储靖伦;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2017-01-05</td>\n",
       "      <td>6</td>\n",
       "      <td>896</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2017</td>\n",
       "      <td>01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36-41</td>\n",
       "      <td>近10年;传媒经济;发展研究</td>\n",
       "      <td>近10年来,随着新媒体信息科技发展以及政治经济环境的改变,在一定意义上可以说,我国传媒经济发...</td>\n",
       "      <td>1002-2295</td>\n",
       "      <td>11-1109/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index   序号                                      题名     (第一)作者/主编  \\\n",
       "0      122  122                      高校党建新媒体传播的理论、方法与策略       卢迪;邱子欣;   \n",
       "1      314  314                      颠覆与重构:新闻融合传播的策略与路径          金莉萍;   \n",
       "2      162  162  题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析       文铭权;李朗;   \n",
       "3      208  208            面向媒体融合的出版企业内容运营策略——以RAYS平台为例  白立华;刘永坚;施其明;   \n",
       "4      111  111                     闯过深水区:媒体融合要抓住三大关键问题          徐世平;   \n",
       "..     ...  ...                                     ...           ...   \n",
       "369    363  363                              2015年传媒业猜想          郭全中;   \n",
       "370    353  353                      2014年网络新媒体研究新触点及走向           孟威;   \n",
       "371    361  361                        2014年中国新媒体传播研究综述          付玉辉;   \n",
       "372    358  358                           2014年中国广播发展图景           孟伟;   \n",
       "373    271  271                2004—2016我国传媒经济学的研究进展(上)      吴信训;储靖伦;   \n",
       "\n",
       "                 来源        发表时间  被引    下载               RT SR  ...    YR  IS  \\\n",
       "0              中国编辑  2019-06-10   7   656  Journal Article  1  ...  2019  06   \n",
       "1    现代传播(中国传媒大学学报)  2016-01-15  11   927  Journal Article  1  ...  2016  01   \n",
       "2               新闻界  2018-11-10   3   457  Journal Article  1  ...  2018  11   \n",
       "3                传媒  2018-01-25  11   291  Journal Article  1  ...  2018  02   \n",
       "4             新闻与写作  2019-08-05   0   151  Journal Article  1  ...  2019  08   \n",
       "..              ...         ...  ..   ...              ... ..  ...   ...  ..   \n",
       "369            青年记者  2015-01-10   2   197  Journal Article  1  ...  2015  01   \n",
       "370            当代传播  2015-03-15   4   732  Journal Article  1  ...  2015  02   \n",
       "371           国际新闻界  2015-01-23  26  2993  Journal Article  1  ...  2015  01   \n",
       "372        中国广播电视学刊  2015-03-01   8   357  Journal Article  1  ...  2015  03   \n",
       "373           新闻与写作  2017-01-05   6   896  Journal Article  1  ...  2017  01   \n",
       "\n",
       "      vo       OP                                    K1  \\\n",
       "0    NaN    20-26                             高校;党建;新媒体   \n",
       "1     38  158-159  新闻融合;策略与路径;互联网思维;内容为王;媒体融合;编辑部;策划设计;   \n",
       "2    NaN    91-94                             媒体融合;数据新闻   \n",
       "3    NaN    68-70                 出版企业;媒体融合;内容运营;RAYS平台   \n",
       "4    NaN    86-89                  媒体融合;信息革命;内容建设;资本;5G   \n",
       "..   ...      ...                                   ...   \n",
       "369  NaN    11-12                      传媒业;媒体融合;传统媒体广告;   \n",
       "370  NaN   4-7+13               媒体融合;手机传播;“微”传播;大数据;新媒体   \n",
       "371   37    35-46               新媒体;4G;媒体融合;互联网思维;产业互联网   \n",
       "372  NaN    14-21                  广播;聚合;移动互联网;媒体融合;大数据   \n",
       "373  NaN    36-41                        近10年;传媒经济;发展研究   \n",
       "\n",
       "                                                    AB         SN          CN  \\\n",
       "0    近年来,党中央对党建信息化提出了一系列新思想、新观点、新要求,为全面提高党建信息化水平指明了...  1671-9220  11-4795/G2   \n",
       "1    在移动互联网时代,新技术、大数据改变了人们接触、参与信息传播的方式,引发了传播技术和信息传播...  1007-8770  11-5363/G2   \n",
       "2    在媒体融合发展战略中,数据新闻是一种重要的实现形式。四川日报\"MORE数据新闻频道\"在题材选...  1007-2438  51-1046/G2   \n",
       "3    在媒体融合中,优质内容资源的合理运营是传统出版企业实现转型升级的基础。但当前许多传统出版企业...  1009-9263  11-4574/G2   \n",
       "4    传统媒体在体制机制、政策措施、流程管理、人才技术等方面加快融合步伐,建立融合传播矩阵、打造融...  1002-2295  11-1109/G2   \n",
       "..                                                 ...        ...         ...   \n",
       "369  互联网将成为影响2015年中国传媒业发展的关键因素,传媒业的很多方面都会打上互联网的烙印。2...  1002-2759  37-1003/G2   \n",
       "370  近两年,中国期刊网收录新媒体研究相关文献比起以往又有明显增加。文献在发表期刊和学科基础上虽有...  1009-5322  65-1201/G2   \n",
       "371  本文对2014年中国新媒体传播研究的整体情况进行了梳理和归纳,认为这年的中国新媒体传播研究进...  1002-5685  11-1523/G2   \n",
       "372  2014年是我国传统广播深入探索媒体融合的一年。从媒体形态转型、产业模式转型,到管理体制和机...  1002-8552  11-1746/G2   \n",
       "373  近10年来,随着新媒体信息科技发展以及政治经济环境的改变,在一定意义上可以说,我国传媒经济发...  1002-2295  11-1109/G2   \n",
       "\n",
       "      LA    DS  \n",
       "0    中文;  CNKI  \n",
       "1    中文;  CNKI  \n",
       "2    中文;  CNKI  \n",
       "3    中文;  CNKI  \n",
       "4    中文;  CNKI  \n",
       "..   ...   ...  \n",
       "369  中文;  CNKI  \n",
       "370  中文;  CNKI  \n",
       "371  中文;  CNKI  \n",
       "372  中文;  CNKI  \n",
       "373  中文;  CNKI  \n",
       "\n",
       "[374 rows x 24 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# def add_file():\n",
    "A = dfA.sort_values(by=['T1','JF','A1', \"YR\"], ascending=False)\n",
    "B = dfB.sort_values(by=['题名','来源','(第一)作者/主编', \"发表时间\"], ascending=False)\n",
    "dfC = B.reset_index().join(A.reset_index().drop('index', axis=1))\n",
    "dfC\n",
    "diff = dfC.query(\"T1!=题名 or JF!=来源\")\n",
    "print('除错',len(diff))\n",
    "print('检查数量',len(dfC))\n",
    "dfC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfC.to_csv(\"datasets\\Combined_374.tsv\", sep='\\t', encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\datasets\\\\Combined_374.tsv', sep='\\t', encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['index', '序号', '题名', '(第一)作者/主编', '来源', '发表时间', '被引', '下载', 'RT', 'SR',\n",
       "       'A1', 'AD', 'T1', 'JF', 'YR', 'IS', 'vo', 'OP', 'K1', 'AB', 'SN', 'CN',\n",
       "       'LA', 'DS'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfC.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_to_x = data['YR'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_to_x = data['YR'].value_counts()\n",
    "\n",
    "x_data = data_to_x.values.tolist()\n",
    "\n",
    "\n",
    "data_by_time = data.sort_values(by=['YR'], ascending=False)\n",
    "y_data = data_by_time['YR'].drop_duplicates(keep='first',inplace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "350    2021\n",
       "248    2021\n",
       "253    2021\n",
       "81     2021\n",
       "320    2021\n",
       "       ... \n",
       "169    2014\n",
       "268    2014\n",
       "178    2014\n",
       "45     2014\n",
       "100    2014\n",
       "Name: YR, Length: 374, dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_by_time['YR']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2019    72\n",
       "2020    61\n",
       "2018    57\n",
       "2017    57\n",
       "2015    49\n",
       "2016    43\n",
       "2021    25\n",
       "2014    10\n",
       "Name: YR, dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_to_x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = y_data.values.tolist()\n",
    "y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 年度趋势——折线图"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 总体趋势"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 506,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 读取文件\n",
    "data = pd.read_csv('C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\datasets\\\\Combined_374.tsv', sep='\\t', encoding=\"utf8\")\n",
    "\n",
    "## 获得y轴数据\n",
    "data_to_y = data['YR'].value_counts()\n",
    "y_data = data_to_y.values.tolist()\n",
    "y_data.sort(reverse=False) #从小到大排序\n",
    "\n",
    "## 获得x轴数据\n",
    "data_by_time = data.sort_values(by=['YR'], ascending=False)\n",
    "data_to_x = data_by_time['YR'].drop_duplicates(keep='first',inplace=False)\n",
    "x_data_int = data_to_x.values.tolist()\n",
    "x_data_int.sort(reverse=False) #依照y轴标准排序\n",
    "x_data = []\n",
    "for i in x_data_int:\n",
    "    j = str(i)+\"年\"\n",
    "    x_data.append(j)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 507,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[10, 25, 43, 49, 57, 57, 61, 72]"
      ]
     },
     "execution_count": 507,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 508,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['2014年', '2015年', '2016年', '2017年', '2018年', '2019年', '2020年', '2021年']"
      ]
     },
     "execution_count": 508,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### pyecharts "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 515,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\Trend_Line.html'"
      ]
     },
     "execution_count": 515,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pyecharts.options as opts\n",
    "from pyecharts.charts import Line\n",
    "(\n",
    "Line()\n",
    "        .set_global_opts(\n",
    "            tooltip_opts=opts.TooltipOpts(is_show=False),\n",
    "            xaxis_opts=opts.AxisOpts(type_=\"category\"),\n",
    "            yaxis_opts=opts.AxisOpts(\n",
    "                type_=\"value\",\n",
    "                axistick_opts=opts.AxisTickOpts(is_show=True),\n",
    "                splitline_opts=opts.SplitLineOpts(is_show=True),\n",
    "            ),\n",
    "        )\n",
    "        .add_xaxis(xaxis_data=x_data)\n",
    "        .add_yaxis(\n",
    "\n",
    "            series_name=\"发文数\",\n",
    "            y_axis=y_data,\n",
    "            symbol=\"emptyCircle\",\n",
    "            is_symbol_show=True,\n",
    "            label_opts=opts.LabelOpts(is_show=False),\n",
    "    )\n",
    "        .render(\"Trend_Line.html\")\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 关键字趋势"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 封装函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_year_ =  dfC\n",
    "df_to_keyword = dfC\n",
    "#     df_year = df_to_keyword\n",
    "df_year_.set_index(\"YR\", inplace=True)\n",
    "\n",
    "df_kw = df_year_.loc[\"2014\"]\n",
    "df_kw[\"K1\"].values\n",
    "\n",
    "\n",
    "# 拆分关键字\n",
    "a1 = [str(x).split(\";\") for x in df_to_keyword[\"K1\"].values]\n",
    "# 得到顺序的关键字列表\n",
    "all_kw = []\n",
    "for i in a1:\n",
    "    for j in i:\n",
    "        all_kw.append(j)\n",
    "# 去空值\n",
    "all_kw = [i for i in all_kw if i != '']\n",
    "# \n",
    "result = pd.value_counts(all_kw)\n",
    "kw_result = result.index.tolist()\n",
    "### \n",
    "kw_type = []\n",
    "for i in result.index:\n",
    "    kw_type.append(i)\n",
    "kw_accout= result.tolist()\n",
    "### \n",
    "list1 = []\n",
    "list2 = []\n",
    "for i in range(0,len(kw_accout)):\n",
    "    j1 = kw_result[i]\n",
    "    j2 = kw_accout[i]\n",
    "    list1.append([j1]) \n",
    "    list1.append([j2])\n",
    "    list2.append(list1[i:i+2])\n",
    "\n",
    "\n",
    "list3 = []\n",
    "for i in range(len(list2)):\n",
    "        if i %2 !=1:     \n",
    "            list3.append(list2[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[['媒体融合'], [227]],\n",
       " [['人工智能'], [61]],\n",
       " [['大数据'], [41]],\n",
       " [['媒体融合发展'], [39]],\n",
       " [['传统媒体'], [29]],\n",
       " [['新媒体'], [20]],\n",
       " [['5G'], [16]],\n",
       " [['传媒业'], [12]],\n",
       " [['短视频'], [12]],\n",
       " [['互联网思维'], [11]],\n",
       " [['媒体深度融合'], [11]],\n",
       " [['主流媒体'], [10]],\n",
       " [['智能媒体'], [10]],\n",
       " [['人工智能技术'], [10]],\n",
       " [['报业集团'], [10]],\n",
       " [['全媒体'], [9]],\n",
       " [['新闻客户端'], [9]],\n",
       " [['人民日报'], [9]],\n",
       " [['新华社'], [9]],\n",
       " [['全国两会'], [8]],\n",
       " [['新兴媒体'], [8]],\n",
       " [['中央厨房'], [7]],\n",
       " [['两会报道'], [7]],\n",
       " [['数字出版'], [7]],\n",
       " [['智能化'], [7]],\n",
       " [['新型主流媒体'], [7]],\n",
       " [['趋势'], [7]],\n",
       " [['深度融合'], [6]],\n",
       " [['内容生产'], [6]],\n",
       " [['大数据时代'], [6]],\n",
       " [['智媒体'], [6]],\n",
       " [['融媒体'], [5]],\n",
       " [['广电媒体'], [5]],\n",
       " [['新媒体产品'], [5]],\n",
       " [['融合转型'], [5]],\n",
       " [['县级融媒体中心'], [5]],\n",
       " [['舆论引导'], [5]],\n",
       " [['县级融媒体'], [5]],\n",
       " [['云计算'], [5]],\n",
       " [['智慧广电'], [5]],\n",
       " [['媒介融合'], [5]],\n",
       " [['移动互联网'], [5]],\n",
       " [['创新'], [5]],\n",
       " [['移动端'], [4]],\n",
       " [['知识服务'], [4]],\n",
       " [['智能化媒体'], [4]],\n",
       " [['广播电视'], [4]],\n",
       " [['广播电视台'], [4]],\n",
       " [['新意'], [4]],\n",
       " [['互联网治理'], [4]],\n",
       " [['科技期刊'], [4]],\n",
       " [['热点'], [4]],\n",
       " [['技术平台'], [4]],\n",
       " [['习近平总书记'], [4]],\n",
       " [['浙报集团'], [4]],\n",
       " [['政务服务'], [4]],\n",
       " [['转型'], [4]],\n",
       " [['今日头条'], [4]],\n",
       " [['互联网'], [4]],\n",
       " [['中国传媒'], [4]],\n",
       " [['移动传播'], [4]],\n",
       " [['数据新闻'], [4]],\n",
       " [['媒体大脑'], [4]],\n",
       " [['精准传播'], [4]],\n",
       " [['盈利模式'], [4]],\n",
       " [['媒体生态'], [3]],\n",
       " [['网络舆情'], [3]],\n",
       " [['数字媒体'], [3]],\n",
       " [['智慧城市'], [3]],\n",
       " [['融媒体产品'], [3]],\n",
       " [['大众报业集团'], [3]],\n",
       " [['移动化'], [3]],\n",
       " [['新媒体平台'], [3]],\n",
       " [['新技术'], [3]],\n",
       " [['智慧传播'], [3]],\n",
       " [['传统主流媒体'], [3]],\n",
       " [['学术期刊'], [3]],\n",
       " [['智慧化'], [3]],\n",
       " [['“互联网+”'], [3]],\n",
       " [['智慧社区'], [3]],\n",
       " [['人工智能时代'], [3]],\n",
       " [['编辑部'], [3]],\n",
       " [['未来媒体'], [3]],\n",
       " [['智能传播'], [3]],\n",
       " [['融合传播'], [3]],\n",
       " [['融合发展'], [3]],\n",
       " [['全媒体传播'], [3]],\n",
       " [['平台型媒体'], [3]],\n",
       " [['传播效果'], [3]],\n",
       " [['平台建设'], [3]],\n",
       " [['新闻业'], [3]],\n",
       " [['疫情防控'], [3]],\n",
       " [['新生态'], [3]],\n",
       " [['生产与传播'], [3]],\n",
       " [['数据驱动'], [3]],\n",
       " [['舆论'], [3]],\n",
       " [['自媒体'], [3]],\n",
       " [['移动直播'], [3]],\n",
       " [['媒体平台'], [3]],\n",
       " [['传统媒体转型'], [3]],\n",
       " [['全媒体平台'], [3]],\n",
       " [['新模式'], [3]],\n",
       " [['路径'], [3]],\n",
       " [['智媒时代'], [3]],\n",
       " [['移动优先'], [3]],\n",
       " [['Facebook'], [2]],\n",
       " [['传播机制'], [2]],\n",
       " [['互联网媒体'], [2]],\n",
       " [['电视节目'], [2]],\n",
       " [['战略合作协议'], [2]],\n",
       " [['传媒产业'], [2]],\n",
       " [['“内容为王”'], [2]],\n",
       " [['大数据技术'], [2]],\n",
       " [['采编流程'], [2]],\n",
       " [['数字化转型'], [2]],\n",
       " [['浙江日报'], [2]],\n",
       " [['智能技术'], [2]],\n",
       " [['社会治理'], [2]],\n",
       " [['新媒体技术'], [2]],\n",
       " [['网络空间命运共同体'], [2]],\n",
       " [['区块链技术'], [2]],\n",
       " [['智媒'], [2]],\n",
       " [['用户数据'], [2]],\n",
       " [['信息革命'], [2]],\n",
       " [['新媒体人才'], [2]],\n",
       " [['国家新战略'], [2]],\n",
       " [['创新路径'], [2]],\n",
       " [['“中央厨房”'], [2]],\n",
       " [['商业模式'], [2]],\n",
       " [['技术逻辑'], [2]],\n",
       " [['高校'], [2]],\n",
       " [['智能服务'], [2]],\n",
       " [['技术融合'], [2]],\n",
       " [['实践路径'], [2]],\n",
       " [['发展之路'], [2]],\n",
       " [['数据化'], [2]],\n",
       " [['新闻生产方式'], [2]],\n",
       " [['全媒体融合'], [2]],\n",
       " [['线上线下'], [2]],\n",
       " [['媒体报道'], [2]],\n",
       " [['新业态'], [2]],\n",
       " [['客户端'], [2]],\n",
       " [['大数据分析'], [2]],\n",
       " [['研究热点'], [2]],\n",
       " [['泛娱乐'], [2]],\n",
       " [['传播方式'], [2]],\n",
       " [['资本'], [2]],\n",
       " [['媒体建设'], [2]],\n",
       " [['新媒体产业'], [2]],\n",
       " [['广播'], [2]],\n",
       " [['APP'], [2]],\n",
       " [['新闻采编'], [2]],\n",
       " [['电视媒体'], [2]],\n",
       " [['党的创新理论'], [2]],\n",
       " [['网络'], [2]],\n",
       " [['技术驱动'], [2]],\n",
       " [['社交平台'], [2]],\n",
       " [['媒体智能化'], [2]],\n",
       " [['手机报'], [2]],\n",
       " [['社会化媒体'], [2]],\n",
       " [['数字中国'], [2]],\n",
       " [['深圳报业集团'], [2]],\n",
       " [['新型媒体'], [2]],\n",
       " [['融合'], [2]],\n",
       " [['媒体行业'], [2]],\n",
       " [['网络安全审查'], [2]],\n",
       " [['运营'], [2]],\n",
       " [['云计算技术'], [2]],\n",
       " [['时政报道'], [2]],\n",
       " [['移动终端'], [2]],\n",
       " [['发展趋势'], [2]],\n",
       " [['央视新闻移动网'], [2]],\n",
       " [['2017年'], [2]],\n",
       " [['广电传媒'], [2]],\n",
       " [['平台化'], [2]],\n",
       " [['网络安全'], [2]],\n",
       " [['中国媒体融合云'], [2]],\n",
       " [['创新实践'], [2]],\n",
       " [['粉丝经济'], [2]],\n",
       " [['发展前瞻'], [2]],\n",
       " [['县级媒体'], [2]],\n",
       " [['策略'], [2]],\n",
       " [['媒体产业'], [2]],\n",
       " [['总编辑'], [2]],\n",
       " [['民生服务'], [2]],\n",
       " [['报业转型'], [2]],\n",
       " [['供给侧改革'], [2]],\n",
       " [['媒体业'], [2]],\n",
       " [['组织架构'], [2]],\n",
       " [['网络直播'], [2]],\n",
       " [['《中国舆论场》'], [2]],\n",
       " [['地市级媒体'], [2]],\n",
       " [['社交化'], [2]],\n",
       " [['内容为王'], [2]],\n",
       " [['四川日报'], [2]],\n",
       " [['全产业链'], [2]],\n",
       " [['智媒化'], [2]],\n",
       " [['数字阅读'], [2]],\n",
       " [['纸质报纸'], [2]],\n",
       " [['新闻舆论工作'], [2]],\n",
       " [['传媒集团'], [2]],\n",
       " [['新趋势'], [2]],\n",
       " [['可持续发展'], [2]],\n",
       " [['大数据中心'], [2]],\n",
       " [['治理'], [2]],\n",
       " [['新闻报道'], [2]],\n",
       " [['新闻传播'], [2]],\n",
       " [['新基建'], [2]],\n",
       " [['省级党报'], [2]],\n",
       " [['媒体融合时代'], [2]],\n",
       " [['传播理念'], [2]],\n",
       " [['应用场景'], [2]],\n",
       " [['国家治理'], [2]],\n",
       " [['科学教育活动'], [1]],\n",
       " [['杭州日报'], [1]],\n",
       " [['主持人'], [1]],\n",
       " [['全媒体化'], [1]],\n",
       " [['党刊'], [1]],\n",
       " [['原创新闻'], [1]],\n",
       " [['出版项目'], [1]],\n",
       " [['第四季度'], [1]],\n",
       " [['借壳上市'], [1]],\n",
       " [['技术支撑'], [1]],\n",
       " [['编辑校对'], [1]],\n",
       " [['文化产业'], [1]],\n",
       " [['联动机制'], [1]],\n",
       " [['《纽约时报》'], [1]],\n",
       " [['移动媒体'], [1]],\n",
       " [['微类型'], [1]],\n",
       " [['战略转型'], [1]],\n",
       " [['出版科学性'], [1]],\n",
       " [['《广州日报》'], [1]],\n",
       " [['数据引擎'], [1]],\n",
       " [['统计职能'], [1]],\n",
       " [['县级融媒中心'], [1]],\n",
       " [['4I'], [1]],\n",
       " [['挑战'], [1]],\n",
       " [['新媒体广告'], [1]],\n",
       " [['语言技术'], [1]],\n",
       " [['人民日报媒体融合'], [1]],\n",
       " [['垂直细分'], [1]],\n",
       " [['功能作用'], [1]],\n",
       " [['云技术'], [1]],\n",
       " [['伦理冲突'], [1]],\n",
       " [['非正式学习'], [1]],\n",
       " [['新兴媒介'], [1]],\n",
       " [['新媒体发展'], [1]],\n",
       " [['广播电视集团'], [1]],\n",
       " [['新华全媒头条'], [1]],\n",
       " [['《河南日报》'], [1]],\n",
       " [['着力点'], [1]],\n",
       " [['碎片化'], [1]],\n",
       " [['数字逻辑'], [1]],\n",
       " [['新闻舆论'], [1]],\n",
       " [['发展策略'], [1]],\n",
       " [['浙报传媒'], [1]],\n",
       " [['发展'], [1]],\n",
       " [['新征程'], [1]],\n",
       " [['思考与探索'], [1]],\n",
       " [['模式重构'], [1]],\n",
       " [['信息传播活动'], [1]],\n",
       " [['数字版权'], [1]],\n",
       " [['文献情报中心'], [1]],\n",
       " [['私域流量'], [1]],\n",
       " [['融合评估'], [1]],\n",
       " [['上游新闻'], [1]],\n",
       " [['融合出版'], [1]],\n",
       " [['流程再造'], [1]],\n",
       " [['媒立方'], [1]],\n",
       " [['反馈机制'], [1]],\n",
       " [['新闻从业者'], [1]],\n",
       " [['运营模式'], [1]],\n",
       " [['广播电视产业'], [1]],\n",
       " [['理论创新'], [1]],\n",
       " [['华龙网'], [1]],\n",
       " [['融合新闻'], [1]],\n",
       " [['世界互联网大会'], [1]],\n",
       " [['即时传播'], [1]],\n",
       " [['互动式反馈'], [1]],\n",
       " [['关键信息基础设施'], [1]],\n",
       " [['资源建设'], [1]],\n",
       " [['中国媒介融合20年'], [1]],\n",
       " [['消费者'], [1]],\n",
       " [['新路径'], [1]],\n",
       " [['网络剧'], [1]],\n",
       " [['产品创新'], [1]],\n",
       " [['市场竞合'], [1]],\n",
       " [['江西省'], [1]],\n",
       " [['第三平台'], [1]],\n",
       " [['品牌项目'], [1]],\n",
       " [['内部孵化'], [1]],\n",
       " [['新闻传播人才'], [1]],\n",
       " [['综合服务能力'], [1]],\n",
       " [['创新案例'], [1]],\n",
       " [['湖北省'], [1]],\n",
       " [['网络视听'], [1]],\n",
       " [['改革创新'], [1]],\n",
       " [['传媒政策'], [1]],\n",
       " [['创新现状'], [1]],\n",
       " [['专题调研'], [1]],\n",
       " [['出版传媒集团'], [1]],\n",
       " [['参与性'], [1]],\n",
       " [['官方微博'], [1]],\n",
       " [['邳州模式'], [1]],\n",
       " [['《贵州商报》'], [1]],\n",
       " [['南华早报'], [1]],\n",
       " [['发展研究'], [1]],\n",
       " [['媒体传播'], [1]],\n",
       " [['智能转向5G'], [1]],\n",
       " [['信道测量'], [1]],\n",
       " [['媒体创新'], [1]],\n",
       " [['四则运算'], [1]],\n",
       " [['区块链'], [1]],\n",
       " [['美国大选的传播方式'], [1]],\n",
       " [['互联网趋势'], [1]],\n",
       " [['乡村振兴'], [1]],\n",
       " [['中国报业协会'], [1]],\n",
       " [['网络意识形态'], [1]],\n",
       " [['具身思维'], [1]],\n",
       " [['组织生态'], [1]],\n",
       " [['5G时代'], [1]],\n",
       " [['互联时代'], [1]],\n",
       " [['数字内容'], [1]],\n",
       " [['信息'], [1]],\n",
       " [['安全意识'], [1]],\n",
       " [['新闻融合'], [1]],\n",
       " [['抢抓机遇'], [1]],\n",
       " [['广播电视总局'], [1]],\n",
       " [['互联网基因'], [1]],\n",
       " [['区块链媒体'], [1]],\n",
       " [['大数据资源'], [1]],\n",
       " [['媒介融合报道'], [1]],\n",
       " [['年轻态'], [1]],\n",
       " [['4K'], [1]],\n",
       " [['传媒伦理'], [1]],\n",
       " [['绩效考核'], [1]],\n",
       " [['数据库'], [1]],\n",
       " [['优化创新'], [1]],\n",
       " [['流浪地球'], [1]],\n",
       " [['“十三五”展望'], [1]],\n",
       " [['党的十九大'], [1]],\n",
       " [['新闻专业精神'], [1]],\n",
       " [['问题特点'], [1]],\n",
       " [['出版融合'], [1]],\n",
       " [['党管媒体'], [1]],\n",
       " [['AI合成主播'], [1]],\n",
       " [['社会治理体系'], [1]],\n",
       " [['事后管理'], [1]],\n",
       " [['新闻出版单位'], [1]],\n",
       " [['传播'], [1]],\n",
       " [['联合重组'], [1]],\n",
       " [['分众化传播'], [1]],\n",
       " [['用户响应'], [1]],\n",
       " [['抗日剧'], [1]],\n",
       " [['建设路径'], [1]],\n",
       " [['生态型'], [1]],\n",
       " [['互联网应用'], [1]],\n",
       " [['突破'], [1]],\n",
       " [['天河机场'], [1]],\n",
       " [['媒体数据'], [1]],\n",
       " [['社交媒体'], [1]],\n",
       " [['全景视频'], [1]],\n",
       " [['智慧阅读'], [1]],\n",
       " [['少儿出版'], [1]],\n",
       " [['山东省'], [1]],\n",
       " [['工具理性'], [1]],\n",
       " [['付费墙'], [1]],\n",
       " [['互联网技术'], [1]],\n",
       " [['城市广电'], [1]],\n",
       " [['信息技术逻辑'], [1]],\n",
       " [['移动社交'], [1]],\n",
       " [['发行量'], [1]],\n",
       " [['在线化'], [1]],\n",
       " [['传媒发展'], [1]],\n",
       " [['聊天机器人'], [1]],\n",
       " [['大数据驱动'], [1]],\n",
       " [['移动出版'], [1]],\n",
       " [['采制主体'], [1]],\n",
       " [['融媒云厨'], [1]],\n",
       " [['共享数据'], [1]],\n",
       " [['服务型统计'], [1]],\n",
       " [['技术智能性'], [1]],\n",
       " [['专题博物馆'], [1]],\n",
       " [['新闻'], [1]],\n",
       " [['微信小程序'], [1]],\n",
       " [['可视化报道'], [1]],\n",
       " [['算法黑箱'], [1]],\n",
       " [['移动电视'], [1]],\n",
       " [['传播形态'], [1]],\n",
       " [['传媒人才'], [1]],\n",
       " [['差异化传播'], [1]],\n",
       " [['先导地位'], [1]],\n",
       " [['内容交互'], [1]],\n",
       " [['中国蓝云'], [1]],\n",
       " [['研究院'], [1]],\n",
       " [['移动阅读'], [1]],\n",
       " [['全媒化'], [1]],\n",
       " [['智能出版'], [1]],\n",
       " [['推进作用'], [1]],\n",
       " [['标准体系建设'], [1]],\n",
       " [['用户至上'], [1]],\n",
       " [['智慧旅游'], [1]],\n",
       " [['数字版权保护技术'], [1]],\n",
       " [['校园新闻'], [1]],\n",
       " [['思政课'], [1]],\n",
       " [['受众反馈'], [1]],\n",
       " [['传播体系'], [1]],\n",
       " [['资本对接'], [1]],\n",
       " [['数字化趋势'], [1]],\n",
       " [['语境变迁'], [1]],\n",
       " [['新时期'], [1]],\n",
       " [['党管数据'], [1]],\n",
       " [['生态文明贵阳国际论坛'], [1]],\n",
       " [['创新发展'], [1]],\n",
       " [['互联网公司'], [1]],\n",
       " [['演播室'], [1]],\n",
       " [['原创内容'], [1]],\n",
       " [['广告业'], [1]],\n",
       " [['人工智能教育'], [1]],\n",
       " [['电视新闻'], [1]],\n",
       " [['状况调查'], [1]],\n",
       " [['传统图书出版'], [1]],\n",
       " [['科技档案'], [1]],\n",
       " [['广电新媒体'], [1]],\n",
       " [['技术解决方案'], [1]],\n",
       " [['电视传播'], [1]],\n",
       " [['传媒业生态'], [1]],\n",
       " [['数据'], [1]],\n",
       " [['新闻管理'], [1]],\n",
       " [['媒体经营'], [1]],\n",
       " [['信息化+商业模式'], [1]],\n",
       " [['领域应用'], [1]],\n",
       " [['智能推送'], [1]],\n",
       " [['独创性'], [1]],\n",
       " [['价值维度'], [1]],\n",
       " [['数据服务'], [1]],\n",
       " [['平台'], [1]],\n",
       " [['整合'], [1]],\n",
       " [['新闻传播领域'], [1]],\n",
       " [['大数据思维'], [1]],\n",
       " [['“十四五”'], [1]],\n",
       " [['中共中央'], [1]],\n",
       " [['传统媒体广告'], [1]],\n",
       " [['网络视频'], [1]],\n",
       " [['传统电视'], [1]],\n",
       " [['产业'], [1]],\n",
       " [['新阶段'], [1]],\n",
       " [['生产者'], [1]],\n",
       " [['新甘肃云'], [1]],\n",
       " [['智慧融媒体'], [1]],\n",
       " [['电视广告'], [1]],\n",
       " [['习近平新时代中国特色社会主义思想'], [1]],\n",
       " [['智慧出版'], [1]],\n",
       " [['智力成果'], [1]],\n",
       " [['最新态势'], [1]],\n",
       " [['以用户为中心'], [1]],\n",
       " [['市场先机'], [1]],\n",
       " [['社会效益'], [1]],\n",
       " [['体验设计'], [1]],\n",
       " [['《赫芬顿邮报》'], [1]],\n",
       " [['机构改革'], [1]],\n",
       " [['华西都市报'], [1]],\n",
       " [['四川电视节'], [1]],\n",
       " [['弯道超车'], [1]],\n",
       " [['中国国际广播电台'], [1]],\n",
       " [['齐鲁晚报'], [1]],\n",
       " [['AI'], [1]],\n",
       " [['旅游业'], [1]],\n",
       " [['理论建构'], [1]],\n",
       " [['新闻价值挖掘'], [1]],\n",
       " [['地方广电媒体'], [1]],\n",
       " [['县级融媒'], [1]],\n",
       " [['智媒体时代'], [1]],\n",
       " [['二维码'], [1]],\n",
       " [['知识转化'], [1]],\n",
       " [['媒体时代'], [1]],\n",
       " [['VR新闻'], [1]],\n",
       " [['新高度'], [1]],\n",
       " [['公共服务平台'], [1]],\n",
       " [['融合报道'], [1]],\n",
       " [['可视化、短视频、学生青年、舆论'], [1]],\n",
       " [['突围路径'], [1]],\n",
       " [['内容传播'], [1]],\n",
       " [['数字电视产业'], [1]],\n",
       " [['风险防范'], [1]],\n",
       " [['深耕'], [1]],\n",
       " [['创投'], [1]],\n",
       " [['传播路径'], [1]],\n",
       " [['增值'], [1]],\n",
       " [['廊坊日报'], [1]],\n",
       " [['数据中心'], [1]],\n",
       " [['太仓市'], [1]],\n",
       " [['最终形态'], [1]],\n",
       " [['新闻资讯'], [1]],\n",
       " [['内容平台'], [1]],\n",
       " [['创意文化产业'], [1]],\n",
       " [['杭报集团'], [1]],\n",
       " [['媒体应用'], [1]],\n",
       " [['智库'], [1]],\n",
       " [['黏合性'], [1]],\n",
       " [['可穿戴'], [1]],\n",
       " [['新闻传播规律'], [1]],\n",
       " [['利润总额'], [1]],\n",
       " [['用户原创内容'], [1]],\n",
       " [['南方周末'], [1]],\n",
       " [['地方广播电视台'], [1]],\n",
       " [['产品'], [1]],\n",
       " [['格式规范'], [1]]]"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_2014 = list3\n",
    "list_2014"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[['媒体融合'], [227]],\n",
       " [['人工智能'], [61]],\n",
       " [['大数据'], [41]],\n",
       " [['媒体融合发展'], [39]],\n",
       " [['传统媒体'], [29]],\n",
       " [['新媒体'], [20]],\n",
       " [['5G'], [16]],\n",
       " [['传媒业'], [12]],\n",
       " [['短视频'], [12]],\n",
       " [['互联网思维'], [11]],\n",
       " [['媒体深度融合'], [11]],\n",
       " [['主流媒体'], [10]],\n",
       " [['智能媒体'], [10]],\n",
       " [['人工智能技术'], [10]],\n",
       " [['报业集团'], [10]],\n",
       " [['全媒体'], [9]],\n",
       " [['新闻客户端'], [9]],\n",
       " [['人民日报'], [9]],\n",
       " [['新华社'], [9]],\n",
       " [['全国两会'], [8]],\n",
       " [['新兴媒体'], [8]],\n",
       " [['中央厨房'], [7]],\n",
       " [['两会报道'], [7]],\n",
       " [['数字出版'], [7]],\n",
       " [['智能化'], [7]],\n",
       " [['新型主流媒体'], [7]],\n",
       " [['趋势'], [7]],\n",
       " [['深度融合'], [6]],\n",
       " [['内容生产'], [6]],\n",
       " [['大数据时代'], [6]],\n",
       " [['智媒体'], [6]],\n",
       " [['融媒体'], [5]],\n",
       " [['广电媒体'], [5]],\n",
       " [['新媒体产品'], [5]],\n",
       " [['融合转型'], [5]],\n",
       " [['县级融媒体中心'], [5]],\n",
       " [['舆论引导'], [5]],\n",
       " [['县级融媒体'], [5]],\n",
       " [['云计算'], [5]],\n",
       " [['智慧广电'], [5]],\n",
       " [['媒介融合'], [5]],\n",
       " [['移动互联网'], [5]],\n",
       " [['创新'], [5]],\n",
       " [['移动端'], [4]],\n",
       " [['知识服务'], [4]],\n",
       " [['智能化媒体'], [4]],\n",
       " [['广播电视'], [4]],\n",
       " [['广播电视台'], [4]],\n",
       " [['新意'], [4]],\n",
       " [['互联网治理'], [4]],\n",
       " [['科技期刊'], [4]],\n",
       " [['热点'], [4]],\n",
       " [['技术平台'], [4]],\n",
       " [['习近平总书记'], [4]],\n",
       " [['浙报集团'], [4]],\n",
       " [['政务服务'], [4]],\n",
       " [['转型'], [4]],\n",
       " [['今日头条'], [4]],\n",
       " [['互联网'], [4]],\n",
       " [['中国传媒'], [4]],\n",
       " [['移动传播'], [4]],\n",
       " [['数据新闻'], [4]],\n",
       " [['媒体大脑'], [4]],\n",
       " [['精准传播'], [4]],\n",
       " [['盈利模式'], [4]],\n",
       " [['媒体生态'], [3]],\n",
       " [['网络舆情'], [3]],\n",
       " [['数字媒体'], [3]],\n",
       " [['智慧城市'], [3]],\n",
       " [['融媒体产品'], [3]],\n",
       " [['大众报业集团'], [3]],\n",
       " [['移动化'], [3]],\n",
       " [['新媒体平台'], [3]],\n",
       " [['新技术'], [3]],\n",
       " [['智慧传播'], [3]],\n",
       " [['传统主流媒体'], [3]],\n",
       " [['学术期刊'], [3]],\n",
       " [['智慧化'], [3]],\n",
       " [['“互联网+”'], [3]],\n",
       " [['智慧社区'], [3]],\n",
       " [['人工智能时代'], [3]],\n",
       " [['编辑部'], [3]],\n",
       " [['未来媒体'], [3]],\n",
       " [['智能传播'], [3]],\n",
       " [['融合传播'], [3]],\n",
       " [['融合发展'], [3]],\n",
       " [['全媒体传播'], [3]],\n",
       " [['平台型媒体'], [3]],\n",
       " [['传播效果'], [3]],\n",
       " [['平台建设'], [3]],\n",
       " [['新闻业'], [3]],\n",
       " [['疫情防控'], [3]],\n",
       " [['新生态'], [3]],\n",
       " [['生产与传播'], [3]],\n",
       " [['数据驱动'], [3]],\n",
       " [['舆论'], [3]],\n",
       " [['自媒体'], [3]],\n",
       " [['移动直播'], [3]],\n",
       " [['媒体平台'], [3]],\n",
       " [['传统媒体转型'], [3]],\n",
       " [['全媒体平台'], [3]],\n",
       " [['新模式'], [3]],\n",
       " [['路径'], [3]],\n",
       " [['智媒时代'], [3]],\n",
       " [['移动优先'], [3]],\n",
       " [['Facebook'], [2]],\n",
       " [['传播机制'], [2]],\n",
       " [['互联网媒体'], [2]],\n",
       " [['电视节目'], [2]],\n",
       " [['战略合作协议'], [2]],\n",
       " [['传媒产业'], [2]],\n",
       " [['“内容为王”'], [2]],\n",
       " [['大数据技术'], [2]],\n",
       " [['采编流程'], [2]],\n",
       " [['数字化转型'], [2]],\n",
       " [['浙江日报'], [2]],\n",
       " [['智能技术'], [2]],\n",
       " [['社会治理'], [2]],\n",
       " [['新媒体技术'], [2]],\n",
       " [['网络空间命运共同体'], [2]],\n",
       " [['区块链技术'], [2]],\n",
       " [['智媒'], [2]],\n",
       " [['用户数据'], [2]],\n",
       " [['信息革命'], [2]],\n",
       " [['新媒体人才'], [2]],\n",
       " [['国家新战略'], [2]],\n",
       " [['创新路径'], [2]],\n",
       " [['“中央厨房”'], [2]],\n",
       " [['商业模式'], [2]],\n",
       " [['技术逻辑'], [2]],\n",
       " [['高校'], [2]],\n",
       " [['智能服务'], [2]],\n",
       " [['技术融合'], [2]],\n",
       " [['实践路径'], [2]],\n",
       " [['发展之路'], [2]],\n",
       " [['数据化'], [2]],\n",
       " [['新闻生产方式'], [2]],\n",
       " [['全媒体融合'], [2]],\n",
       " [['线上线下'], [2]],\n",
       " [['媒体报道'], [2]],\n",
       " [['新业态'], [2]],\n",
       " [['客户端'], [2]],\n",
       " [['大数据分析'], [2]],\n",
       " [['研究热点'], [2]],\n",
       " [['泛娱乐'], [2]],\n",
       " [['传播方式'], [2]],\n",
       " [['资本'], [2]],\n",
       " [['媒体建设'], [2]],\n",
       " [['新媒体产业'], [2]],\n",
       " [['广播'], [2]],\n",
       " [['APP'], [2]],\n",
       " [['新闻采编'], [2]],\n",
       " [['电视媒体'], [2]],\n",
       " [['党的创新理论'], [2]],\n",
       " [['网络'], [2]],\n",
       " [['技术驱动'], [2]],\n",
       " [['社交平台'], [2]],\n",
       " [['媒体智能化'], [2]],\n",
       " [['手机报'], [2]],\n",
       " [['社会化媒体'], [2]],\n",
       " [['数字中国'], [2]],\n",
       " [['深圳报业集团'], [2]],\n",
       " [['新型媒体'], [2]],\n",
       " [['融合'], [2]],\n",
       " [['媒体行业'], [2]],\n",
       " [['网络安全审查'], [2]],\n",
       " [['运营'], [2]],\n",
       " [['云计算技术'], [2]],\n",
       " [['时政报道'], [2]],\n",
       " [['移动终端'], [2]],\n",
       " [['发展趋势'], [2]],\n",
       " [['央视新闻移动网'], [2]],\n",
       " [['2017年'], [2]],\n",
       " [['广电传媒'], [2]],\n",
       " [['平台化'], [2]],\n",
       " [['网络安全'], [2]],\n",
       " [['中国媒体融合云'], [2]],\n",
       " [['创新实践'], [2]],\n",
       " [['粉丝经济'], [2]],\n",
       " [['发展前瞻'], [2]],\n",
       " [['县级媒体'], [2]],\n",
       " [['策略'], [2]],\n",
       " [['媒体产业'], [2]],\n",
       " [['总编辑'], [2]],\n",
       " [['民生服务'], [2]],\n",
       " [['报业转型'], [2]],\n",
       " [['供给侧改革'], [2]],\n",
       " [['媒体业'], [2]],\n",
       " [['组织架构'], [2]],\n",
       " [['网络直播'], [2]],\n",
       " [['《中国舆论场》'], [2]],\n",
       " [['地市级媒体'], [2]],\n",
       " [['社交化'], [2]],\n",
       " [['内容为王'], [2]],\n",
       " [['四川日报'], [2]],\n",
       " [['全产业链'], [2]],\n",
       " [['智媒化'], [2]],\n",
       " [['数字阅读'], [2]],\n",
       " [['纸质报纸'], [2]],\n",
       " [['新闻舆论工作'], [2]],\n",
       " [['传媒集团'], [2]],\n",
       " [['新趋势'], [2]],\n",
       " [['可持续发展'], [2]],\n",
       " [['大数据中心'], [2]],\n",
       " [['治理'], [2]],\n",
       " [['新闻报道'], [2]],\n",
       " [['新闻传播'], [2]],\n",
       " [['新基建'], [2]],\n",
       " [['省级党报'], [2]],\n",
       " [['媒体融合时代'], [2]],\n",
       " [['传播理念'], [2]],\n",
       " [['应用场景'], [2]],\n",
       " [['国家治理'], [2]],\n",
       " [['科学教育活动'], [1]],\n",
       " [['杭州日报'], [1]],\n",
       " [['主持人'], [1]],\n",
       " [['全媒体化'], [1]],\n",
       " [['党刊'], [1]],\n",
       " [['原创新闻'], [1]],\n",
       " [['出版项目'], [1]],\n",
       " [['第四季度'], [1]],\n",
       " [['借壳上市'], [1]],\n",
       " [['技术支撑'], [1]],\n",
       " [['编辑校对'], [1]],\n",
       " [['文化产业'], [1]],\n",
       " [['联动机制'], [1]],\n",
       " [['《纽约时报》'], [1]],\n",
       " [['移动媒体'], [1]],\n",
       " [['微类型'], [1]],\n",
       " [['战略转型'], [1]],\n",
       " [['出版科学性'], [1]],\n",
       " [['《广州日报》'], [1]],\n",
       " [['数据引擎'], [1]],\n",
       " [['统计职能'], [1]],\n",
       " [['县级融媒中心'], [1]],\n",
       " [['4I'], [1]],\n",
       " [['挑战'], [1]],\n",
       " [['新媒体广告'], [1]],\n",
       " [['语言技术'], [1]],\n",
       " [['人民日报媒体融合'], [1]],\n",
       " [['垂直细分'], [1]],\n",
       " [['功能作用'], [1]],\n",
       " [['云技术'], [1]],\n",
       " [['伦理冲突'], [1]],\n",
       " [['非正式学习'], [1]],\n",
       " [['新兴媒介'], [1]],\n",
       " [['新媒体发展'], [1]],\n",
       " [['广播电视集团'], [1]],\n",
       " [['新华全媒头条'], [1]],\n",
       " [['《河南日报》'], [1]],\n",
       " [['着力点'], [1]],\n",
       " [['碎片化'], [1]],\n",
       " [['数字逻辑'], [1]],\n",
       " [['新闻舆论'], [1]],\n",
       " [['发展策略'], [1]],\n",
       " [['浙报传媒'], [1]],\n",
       " [['发展'], [1]],\n",
       " [['新征程'], [1]],\n",
       " [['思考与探索'], [1]],\n",
       " [['模式重构'], [1]],\n",
       " [['信息传播活动'], [1]],\n",
       " [['数字版权'], [1]],\n",
       " [['文献情报中心'], [1]],\n",
       " [['私域流量'], [1]],\n",
       " [['融合评估'], [1]],\n",
       " [['上游新闻'], [1]],\n",
       " [['融合出版'], [1]],\n",
       " [['流程再造'], [1]],\n",
       " [['媒立方'], [1]],\n",
       " [['反馈机制'], [1]],\n",
       " [['新闻从业者'], [1]],\n",
       " [['运营模式'], [1]],\n",
       " [['广播电视产业'], [1]],\n",
       " [['理论创新'], [1]],\n",
       " [['华龙网'], [1]],\n",
       " [['融合新闻'], [1]],\n",
       " [['世界互联网大会'], [1]],\n",
       " [['即时传播'], [1]],\n",
       " [['互动式反馈'], [1]],\n",
       " [['关键信息基础设施'], [1]],\n",
       " [['资源建设'], [1]],\n",
       " [['中国媒介融合20年'], [1]],\n",
       " [['消费者'], [1]],\n",
       " [['新路径'], [1]],\n",
       " [['网络剧'], [1]],\n",
       " [['产品创新'], [1]],\n",
       " [['市场竞合'], [1]],\n",
       " [['江西省'], [1]],\n",
       " [['第三平台'], [1]],\n",
       " [['品牌项目'], [1]],\n",
       " [['内部孵化'], [1]],\n",
       " [['新闻传播人才'], [1]],\n",
       " [['综合服务能力'], [1]],\n",
       " [['创新案例'], [1]],\n",
       " [['湖北省'], [1]],\n",
       " [['网络视听'], [1]],\n",
       " [['改革创新'], [1]],\n",
       " [['传媒政策'], [1]],\n",
       " [['创新现状'], [1]],\n",
       " [['专题调研'], [1]],\n",
       " [['出版传媒集团'], [1]],\n",
       " [['参与性'], [1]],\n",
       " [['官方微博'], [1]],\n",
       " [['邳州模式'], [1]],\n",
       " [['《贵州商报》'], [1]],\n",
       " [['南华早报'], [1]],\n",
       " [['发展研究'], [1]],\n",
       " [['媒体传播'], [1]],\n",
       " [['智能转向5G'], [1]],\n",
       " [['信道测量'], [1]],\n",
       " [['媒体创新'], [1]],\n",
       " [['四则运算'], [1]],\n",
       " [['区块链'], [1]],\n",
       " [['美国大选的传播方式'], [1]],\n",
       " [['互联网趋势'], [1]],\n",
       " [['乡村振兴'], [1]],\n",
       " [['中国报业协会'], [1]],\n",
       " [['网络意识形态'], [1]],\n",
       " [['具身思维'], [1]],\n",
       " [['组织生态'], [1]],\n",
       " [['5G时代'], [1]],\n",
       " [['互联时代'], [1]],\n",
       " [['数字内容'], [1]],\n",
       " [['信息'], [1]],\n",
       " [['安全意识'], [1]],\n",
       " [['新闻融合'], [1]],\n",
       " [['抢抓机遇'], [1]],\n",
       " [['广播电视总局'], [1]],\n",
       " [['互联网基因'], [1]],\n",
       " [['区块链媒体'], [1]],\n",
       " [['大数据资源'], [1]],\n",
       " [['媒介融合报道'], [1]],\n",
       " [['年轻态'], [1]],\n",
       " [['4K'], [1]],\n",
       " [['传媒伦理'], [1]],\n",
       " [['绩效考核'], [1]],\n",
       " [['数据库'], [1]],\n",
       " [['优化创新'], [1]],\n",
       " [['流浪地球'], [1]],\n",
       " [['“十三五”展望'], [1]],\n",
       " [['党的十九大'], [1]],\n",
       " [['新闻专业精神'], [1]],\n",
       " [['问题特点'], [1]],\n",
       " [['出版融合'], [1]],\n",
       " [['党管媒体'], [1]],\n",
       " [['AI合成主播'], [1]],\n",
       " [['社会治理体系'], [1]],\n",
       " [['事后管理'], [1]],\n",
       " [['新闻出版单位'], [1]],\n",
       " [['传播'], [1]],\n",
       " [['联合重组'], [1]],\n",
       " [['分众化传播'], [1]],\n",
       " [['用户响应'], [1]],\n",
       " [['抗日剧'], [1]],\n",
       " [['建设路径'], [1]],\n",
       " [['生态型'], [1]],\n",
       " [['互联网应用'], [1]],\n",
       " [['突破'], [1]],\n",
       " [['天河机场'], [1]],\n",
       " [['媒体数据'], [1]],\n",
       " [['社交媒体'], [1]],\n",
       " [['全景视频'], [1]],\n",
       " [['智慧阅读'], [1]],\n",
       " [['少儿出版'], [1]],\n",
       " [['山东省'], [1]],\n",
       " [['工具理性'], [1]],\n",
       " [['付费墙'], [1]],\n",
       " [['互联网技术'], [1]],\n",
       " [['城市广电'], [1]],\n",
       " [['信息技术逻辑'], [1]],\n",
       " [['移动社交'], [1]],\n",
       " [['发行量'], [1]],\n",
       " [['在线化'], [1]],\n",
       " [['传媒发展'], [1]],\n",
       " [['聊天机器人'], [1]],\n",
       " [['大数据驱动'], [1]],\n",
       " [['移动出版'], [1]],\n",
       " [['采制主体'], [1]],\n",
       " [['融媒云厨'], [1]],\n",
       " [['共享数据'], [1]],\n",
       " [['服务型统计'], [1]],\n",
       " [['技术智能性'], [1]],\n",
       " [['专题博物馆'], [1]],\n",
       " [['新闻'], [1]],\n",
       " [['微信小程序'], [1]],\n",
       " [['可视化报道'], [1]],\n",
       " [['算法黑箱'], [1]],\n",
       " [['移动电视'], [1]],\n",
       " [['传播形态'], [1]],\n",
       " [['传媒人才'], [1]],\n",
       " [['差异化传播'], [1]],\n",
       " [['先导地位'], [1]],\n",
       " [['内容交互'], [1]],\n",
       " [['中国蓝云'], [1]],\n",
       " [['研究院'], [1]],\n",
       " [['移动阅读'], [1]],\n",
       " [['全媒化'], [1]],\n",
       " [['智能出版'], [1]],\n",
       " [['推进作用'], [1]],\n",
       " [['标准体系建设'], [1]],\n",
       " [['用户至上'], [1]],\n",
       " [['智慧旅游'], [1]],\n",
       " [['数字版权保护技术'], [1]],\n",
       " [['校园新闻'], [1]],\n",
       " [['思政课'], [1]],\n",
       " [['受众反馈'], [1]],\n",
       " [['传播体系'], [1]],\n",
       " [['资本对接'], [1]],\n",
       " [['数字化趋势'], [1]],\n",
       " [['语境变迁'], [1]],\n",
       " [['新时期'], [1]],\n",
       " [['党管数据'], [1]],\n",
       " [['生态文明贵阳国际论坛'], [1]],\n",
       " [['创新发展'], [1]],\n",
       " [['互联网公司'], [1]],\n",
       " [['演播室'], [1]],\n",
       " [['原创内容'], [1]],\n",
       " [['广告业'], [1]],\n",
       " [['人工智能教育'], [1]],\n",
       " [['电视新闻'], [1]],\n",
       " [['状况调查'], [1]],\n",
       " [['传统图书出版'], [1]],\n",
       " [['科技档案'], [1]],\n",
       " [['广电新媒体'], [1]],\n",
       " [['技术解决方案'], [1]],\n",
       " [['电视传播'], [1]],\n",
       " [['传媒业生态'], [1]],\n",
       " [['数据'], [1]],\n",
       " [['新闻管理'], [1]],\n",
       " [['媒体经营'], [1]],\n",
       " [['信息化+商业模式'], [1]],\n",
       " [['领域应用'], [1]],\n",
       " [['智能推送'], [1]],\n",
       " [['独创性'], [1]],\n",
       " [['价值维度'], [1]],\n",
       " [['数据服务'], [1]],\n",
       " [['平台'], [1]],\n",
       " [['整合'], [1]],\n",
       " [['新闻传播领域'], [1]],\n",
       " [['大数据思维'], [1]],\n",
       " [['“十四五”'], [1]],\n",
       " [['中共中央'], [1]],\n",
       " [['传统媒体广告'], [1]],\n",
       " [['网络视频'], [1]],\n",
       " [['传统电视'], [1]],\n",
       " [['产业'], [1]],\n",
       " [['新阶段'], [1]],\n",
       " [['生产者'], [1]],\n",
       " [['新甘肃云'], [1]],\n",
       " [['智慧融媒体'], [1]],\n",
       " [['电视广告'], [1]],\n",
       " [['习近平新时代中国特色社会主义思想'], [1]],\n",
       " [['智慧出版'], [1]],\n",
       " [['智力成果'], [1]],\n",
       " [['最新态势'], [1]],\n",
       " [['以用户为中心'], [1]],\n",
       " [['市场先机'], [1]],\n",
       " [['社会效益'], [1]],\n",
       " [['体验设计'], [1]],\n",
       " [['《赫芬顿邮报》'], [1]],\n",
       " [['机构改革'], [1]],\n",
       " [['华西都市报'], [1]],\n",
       " [['四川电视节'], [1]],\n",
       " [['弯道超车'], [1]],\n",
       " [['中国国际广播电台'], [1]],\n",
       " [['齐鲁晚报'], [1]],\n",
       " [['AI'], [1]],\n",
       " [['旅游业'], [1]],\n",
       " [['理论建构'], [1]],\n",
       " [['新闻价值挖掘'], [1]],\n",
       " [['地方广电媒体'], [1]],\n",
       " [['县级融媒'], [1]],\n",
       " [['智媒体时代'], [1]],\n",
       " [['二维码'], [1]],\n",
       " [['知识转化'], [1]],\n",
       " [['媒体时代'], [1]],\n",
       " [['VR新闻'], [1]],\n",
       " [['新高度'], [1]],\n",
       " [['公共服务平台'], [1]],\n",
       " [['融合报道'], [1]],\n",
       " [['可视化、短视频、学生青年、舆论'], [1]],\n",
       " [['突围路径'], [1]],\n",
       " [['内容传播'], [1]],\n",
       " [['数字电视产业'], [1]],\n",
       " [['风险防范'], [1]],\n",
       " [['深耕'], [1]],\n",
       " [['创投'], [1]],\n",
       " [['传播路径'], [1]],\n",
       " [['增值'], [1]],\n",
       " [['廊坊日报'], [1]],\n",
       " [['数据中心'], [1]],\n",
       " [['太仓市'], [1]],\n",
       " [['最终形态'], [1]],\n",
       " [['新闻资讯'], [1]],\n",
       " [['内容平台'], [1]],\n",
       " [['创意文化产业'], [1]],\n",
       " [['杭报集团'], [1]],\n",
       " [['媒体应用'], [1]],\n",
       " [['智库'], [1]],\n",
       " [['黏合性'], [1]],\n",
       " [['可穿戴'], [1]],\n",
       " [['新闻传播规律'], [1]],\n",
       " [['利润总额'], [1]],\n",
       " [['用户原创内容'], [1]],\n",
       " [['南方周末'], [1]],\n",
       " [['地方广播电视台'], [1]],\n",
       " [['产品'], [1]],\n",
       " [['格式规范'], [1]]]"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_2014 = time_and_keyword(\"2014\")\n",
    "list_2014"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2015 = time_and_keyword(\"2015\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2016 = time_and_keyword(\"2016\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2017 = time_and_keyword(\"2017\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2018 = time_and_keyword(\"2018\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2019 = time_and_keyword(\"2019\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2020 = time_and_keyword(\"2020\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2021 = time_and_keyword(\"2021\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合'],\n",
       " [31],\n",
       " ['大数据'],\n",
       " [13],\n",
       " ['媒体融合发展'],\n",
       " [7],\n",
       " ['新媒体'],\n",
       " [6],\n",
       " ['传统媒体'],\n",
       " [5],\n",
       " ['互联网思维'],\n",
       " [5],\n",
       " ['大数据时代'],\n",
       " [4],\n",
       " ['新型主流媒体'],\n",
       " [3],\n",
       " ['数字出版'],\n",
       " [3],\n",
       " ['传媒业'],\n",
       " [2],\n",
       " ['新华社'],\n",
       " [2],\n",
       " ['电视节目'],\n",
       " [2],\n",
       " ['融合发展'],\n",
       " [2],\n",
       " ['媒体融合时代'],\n",
       " [2],\n",
       " ['编辑部'],\n",
       " [2],\n",
       " ['电视媒体'],\n",
       " [2],\n",
       " ['社交平台'],\n",
       " [1],\n",
       " ['《人民日报》与新媒体'],\n",
       " [1],\n",
       " ['精准出版'],\n",
       " [1],\n",
       " ['问题及对策'],\n",
       " [1],\n",
       " ['新闻策划'],\n",
       " [1],\n",
       " ['数字媒体'],\n",
       " [1],\n",
       " ['官方微博'],\n",
       " [1],\n",
       " ['地方广电媒体'],\n",
       " [1],\n",
       " ['反馈机制'],\n",
       " [1],\n",
       " ['大数据资源'],\n",
       " [1],\n",
       " ['四川电视节'],\n",
       " [1],\n",
       " ['创投'],\n",
       " [1],\n",
       " ['《互动》'],\n",
       " [1],\n",
       " ['事业部'],\n",
       " [1],\n",
       " ['互联网'],\n",
       " [1],\n",
       " ['转型发展'],\n",
       " [1],\n",
       " ['数据融合'],\n",
       " [1],\n",
       " ['智慧城市'],\n",
       " [1],\n",
       " ['天安门广场'],\n",
       " [1],\n",
       " ['媒体数据'],\n",
       " [1],\n",
       " ['新闻从业者'],\n",
       " [1],\n",
       " ['转型'],\n",
       " [1],\n",
       " ['趋势'],\n",
       " [1],\n",
       " ['播放量'],\n",
       " [1],\n",
       " ['盈利模式'],\n",
       " [1],\n",
       " ['突围路径'],\n",
       " [1],\n",
       " ['媒体公共服务'],\n",
       " [1],\n",
       " ['电子商务'],\n",
       " [1],\n",
       " ['聚合'],\n",
       " [1],\n",
       " ['网络直播'],\n",
       " [1],\n",
       " ['传播方式'],\n",
       " [1],\n",
       " ['传统档案'],\n",
       " [1],\n",
       " ['新媒介'],\n",
       " [1],\n",
       " ['媒体生态'],\n",
       " [1],\n",
       " ['《河南日报》'],\n",
       " [1],\n",
       " ['微平台'],\n",
       " [1],\n",
       " ['“微”传播'],\n",
       " [1],\n",
       " ['浙报集团'],\n",
       " [1],\n",
       " ['收视率'],\n",
       " [1],\n",
       " ['传统图书出版'],\n",
       " [1],\n",
       " ['生态变革'],\n",
       " [1],\n",
       " ['资本对接'],\n",
       " [1],\n",
       " ['新闻客户端'],\n",
       " [1],\n",
       " ['新兴媒体'],\n",
       " [1],\n",
       " ['创新'],\n",
       " [1],\n",
       " ['电视节目网络影响力'],\n",
       " [1],\n",
       " ['客户端'],\n",
       " [1],\n",
       " ['信息闭环'],\n",
       " [1],\n",
       " ['互联网趋势'],\n",
       " [1],\n",
       " ['传统出版业'],\n",
       " [1],\n",
       " ['媒介研究'],\n",
       " [1],\n",
       " ['舆论引导'],\n",
       " [1],\n",
       " ['生态型'],\n",
       " [1],\n",
       " ['项目编号'],\n",
       " [1],\n",
       " ['收视行为'],\n",
       " [1],\n",
       " ['移动传播'],\n",
       " [1],\n",
       " ['新闻聚合'],\n",
       " [1],\n",
       " ['信息'],\n",
       " [1],\n",
       " ['新奥特'],\n",
       " [1],\n",
       " ['4G'],\n",
       " [1],\n",
       " ['公共服务平台'],\n",
       " [1],\n",
       " ['金融资本'],\n",
       " [1],\n",
       " ['中央厨房'],\n",
       " [1],\n",
       " ['媒体时代'],\n",
       " [1],\n",
       " ['API'],\n",
       " [1],\n",
       " ['互联网公司'],\n",
       " [1],\n",
       " ['大数据驱动'],\n",
       " [1],\n",
       " ['数据库'],\n",
       " [1],\n",
       " ['高校校报'],\n",
       " [1],\n",
       " ['受众反馈'],\n",
       " [1],\n",
       " ['农业媒体'],\n",
       " [1],\n",
       " ['融合途径'],\n",
       " [1],\n",
       " ['杭报集团'],\n",
       " [1],\n",
       " ['新闻生产方式'],\n",
       " [1],\n",
       " ['精准传播'],\n",
       " [1],\n",
       " ['主持人'],\n",
       " [1],\n",
       " ['大数据应用'],\n",
       " [1],\n",
       " ['数据新闻报道'],\n",
       " [1],\n",
       " ['国家数字复合出版系统工程'],\n",
       " [1],\n",
       " ['杭州日报'],\n",
       " [1],\n",
       " ['思考与探索'],\n",
       " [1],\n",
       " ['大数据营销'],\n",
       " [1],\n",
       " ['产业互联网'],\n",
       " [1],\n",
       " ['数字报业'],\n",
       " [1],\n",
       " ['新媒体发展'],\n",
       " [1],\n",
       " ['网络监督'],\n",
       " [1],\n",
       " ['手机报'],\n",
       " [1],\n",
       " ['新闻出版单位'],\n",
       " [1],\n",
       " ['演播室'],\n",
       " [1],\n",
       " ['开放性'],\n",
       " [1],\n",
       " ['数据平台'],\n",
       " [1],\n",
       " ['社会化媒体'],\n",
       " [1],\n",
       " ['著作权'],\n",
       " [1],\n",
       " ['全国两会'],\n",
       " [1],\n",
       " ['大数据分析'],\n",
       " [1],\n",
       " ['纸质报纸'],\n",
       " [1],\n",
       " ['场景理论'],\n",
       " [1],\n",
       " ['电视新闻'],\n",
       " [1],\n",
       " ['手机传播'],\n",
       " [1],\n",
       " ['新民晚报'],\n",
       " [1],\n",
       " ['传统媒体广告'],\n",
       " [1],\n",
       " ['平台'],\n",
       " [1],\n",
       " ['传播'],\n",
       " [1],\n",
       " ['互联网媒体'],\n",
       " [1],\n",
       " ['OTO'],\n",
       " [1],\n",
       " ['新闻媒体'],\n",
       " [1],\n",
       " ['移动互联网'],\n",
       " [1],\n",
       " ['数字报纸'],\n",
       " [1],\n",
       " ['新闻媒介'],\n",
       " [1],\n",
       " ['问题'],\n",
       " [1],\n",
       " ['媒体管理'],\n",
       " [1],\n",
       " ['主流媒体'],\n",
       " [1],\n",
       " ['单向传播'],\n",
       " [1],\n",
       " ['大数据技术'],\n",
       " [1],\n",
       " ['全媒体报道'],\n",
       " [1],\n",
       " ['人民日报'],\n",
       " [1],\n",
       " ['全景视频'],\n",
       " [1],\n",
       " ['资本'],\n",
       " [1],\n",
       " ['互动式反馈'],\n",
       " [1],\n",
       " ['云计算'],\n",
       " [1],\n",
       " ['“内容为王”'],\n",
       " [1],\n",
       " ['增值'],\n",
       " [1],\n",
       " ['广电传媒'],\n",
       " [1],\n",
       " ['着力点'],\n",
       " [1],\n",
       " ['河南日报报业集团'],\n",
       " [1],\n",
       " ['融合转型'],\n",
       " [1],\n",
       " ['问题及策略'],\n",
       " [1],\n",
       " ['发布渠道'],\n",
       " [1],\n",
       " ['移动客户端'],\n",
       " [1],\n",
       " ['媒体行业'],\n",
       " [1],\n",
       " ['移动端'],\n",
       " [1],\n",
       " ['开放式'],\n",
       " [1],\n",
       " ['产品'],\n",
       " [1],\n",
       " ['可视化报道'],\n",
       " [1],\n",
       " ['抗日剧'],\n",
       " [1],\n",
       " ['新路径'],\n",
       " [1],\n",
       " ['互联时代'],\n",
       " [1],\n",
       " ['《每日商报》'],\n",
       " [1],\n",
       " ['路径'],\n",
       " [1],\n",
       " ['采制主体'],\n",
       " [1],\n",
       " ['媒体机构'],\n",
       " [1],\n",
       " ['新闻舆论'],\n",
       " [1],\n",
       " ['媒介事件'],\n",
       " [1],\n",
       " ['广播'],\n",
       " [1],\n",
       " ['学术动态'],\n",
       " [1],\n",
       " ['城市广电'],\n",
       " [1],\n",
       " ['人民网'],\n",
       " [1]]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_2014 = time_and_keyword(\"2014\")\n",
    "list_2015 = time_and_keyword(\"2015\")\n",
    "list_2016 = time_and_keyword(\"2016\")\n",
    "list_2017 = time_and_keyword(\"2017\")\n",
    "list_2018 = time_and_keyword(\"2018\")\n",
    "list_2019 = time_and_keyword(\"2019\")\n",
    "list_2020 = time_and_keyword(\"2020\")\n",
    "list_2021 = time_and_keyword(\"2021\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 修改"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 509,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "除错 0\n",
      "检查数量 374\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>序号</th>\n",
       "      <th>题名</th>\n",
       "      <th>(第一)作者/主编</th>\n",
       "      <th>来源</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>RT</th>\n",
       "      <th>SR</th>\n",
       "      <th>...</th>\n",
       "      <th>YR</th>\n",
       "      <th>IS</th>\n",
       "      <th>vo</th>\n",
       "      <th>OP</th>\n",
       "      <th>K1</th>\n",
       "      <th>AB</th>\n",
       "      <th>SN</th>\n",
       "      <th>CN</th>\n",
       "      <th>LA</th>\n",
       "      <th>DS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>122</td>\n",
       "      <td>高校党建新媒体传播的理论、方法与策略</td>\n",
       "      <td>卢迪;邱子欣;</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2019-06-10</td>\n",
       "      <td>7</td>\n",
       "      <td>656</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2019</td>\n",
       "      <td>06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20-26</td>\n",
       "      <td>高校;党建;新媒体</td>\n",
       "      <td>近年来,党中央对党建信息化提出了一系列新思想、新观点、新要求,为全面提高党建信息化水平指明了...</td>\n",
       "      <td>1671-9220</td>\n",
       "      <td>11-4795/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>314</td>\n",
       "      <td>314</td>\n",
       "      <td>颠覆与重构:新闻融合传播的策略与路径</td>\n",
       "      <td>金莉萍;</td>\n",
       "      <td>现代传播(中国传媒大学学报)</td>\n",
       "      <td>2016-01-15</td>\n",
       "      <td>11</td>\n",
       "      <td>927</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2016</td>\n",
       "      <td>01</td>\n",
       "      <td>38</td>\n",
       "      <td>158-159</td>\n",
       "      <td>新闻融合;策略与路径;互联网思维;内容为王;媒体融合;编辑部;策划设计;</td>\n",
       "      <td>在移动互联网时代,新技术、大数据改变了人们接触、参与信息传播的方式,引发了传播技术和信息传播...</td>\n",
       "      <td>1007-8770</td>\n",
       "      <td>11-5363/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>162</td>\n",
       "      <td>162</td>\n",
       "      <td>题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析</td>\n",
       "      <td>文铭权;李朗;</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2018-11-10</td>\n",
       "      <td>3</td>\n",
       "      <td>457</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2018</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>91-94</td>\n",
       "      <td>媒体融合;数据新闻</td>\n",
       "      <td>在媒体融合发展战略中,数据新闻是一种重要的实现形式。四川日报\"MORE数据新闻频道\"在题材选...</td>\n",
       "      <td>1007-2438</td>\n",
       "      <td>51-1046/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>208</td>\n",
       "      <td>208</td>\n",
       "      <td>面向媒体融合的出版企业内容运营策略——以RAYS平台为例</td>\n",
       "      <td>白立华;刘永坚;施其明;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2018-01-25</td>\n",
       "      <td>11</td>\n",
       "      <td>291</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2018</td>\n",
       "      <td>02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>68-70</td>\n",
       "      <td>出版企业;媒体融合;内容运营;RAYS平台</td>\n",
       "      <td>在媒体融合中,优质内容资源的合理运营是传统出版企业实现转型升级的基础。但当前许多传统出版企业...</td>\n",
       "      <td>1009-9263</td>\n",
       "      <td>11-4574/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>111</td>\n",
       "      <td>111</td>\n",
       "      <td>闯过深水区:媒体融合要抓住三大关键问题</td>\n",
       "      <td>徐世平;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2019-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>86-89</td>\n",
       "      <td>媒体融合;信息革命;内容建设;资本;5G</td>\n",
       "      <td>传统媒体在体制机制、政策措施、流程管理、人才技术等方面加快融合步伐,建立融合传播矩阵、打造融...</td>\n",
       "      <td>1002-2295</td>\n",
       "      <td>11-1109/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>2015年传媒业猜想</td>\n",
       "      <td>郭全中;</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2015-01-10</td>\n",
       "      <td>2</td>\n",
       "      <td>197</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11-12</td>\n",
       "      <td>传媒业;媒体融合;传统媒体广告;</td>\n",
       "      <td>互联网将成为影响2015年中国传媒业发展的关键因素,传媒业的很多方面都会打上互联网的烙印。2...</td>\n",
       "      <td>1002-2759</td>\n",
       "      <td>37-1003/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>353</td>\n",
       "      <td>353</td>\n",
       "      <td>2014年网络新媒体研究新触点及走向</td>\n",
       "      <td>孟威;</td>\n",
       "      <td>当代传播</td>\n",
       "      <td>2015-03-15</td>\n",
       "      <td>4</td>\n",
       "      <td>732</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4-7+13</td>\n",
       "      <td>媒体融合;手机传播;“微”传播;大数据;新媒体</td>\n",
       "      <td>近两年,中国期刊网收录新媒体研究相关文献比起以往又有明显增加。文献在发表期刊和学科基础上虽有...</td>\n",
       "      <td>1009-5322</td>\n",
       "      <td>65-1201/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>361</td>\n",
       "      <td>361</td>\n",
       "      <td>2014年中国新媒体传播研究综述</td>\n",
       "      <td>付玉辉;</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2015-01-23</td>\n",
       "      <td>26</td>\n",
       "      <td>2993</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>01</td>\n",
       "      <td>37</td>\n",
       "      <td>35-46</td>\n",
       "      <td>新媒体;4G;媒体融合;互联网思维;产业互联网</td>\n",
       "      <td>本文对2014年中国新媒体传播研究的整体情况进行了梳理和归纳,认为这年的中国新媒体传播研究进...</td>\n",
       "      <td>1002-5685</td>\n",
       "      <td>11-1523/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>358</td>\n",
       "      <td>358</td>\n",
       "      <td>2014年中国广播发展图景</td>\n",
       "      <td>孟伟;</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2015-03-01</td>\n",
       "      <td>8</td>\n",
       "      <td>357</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2015</td>\n",
       "      <td>03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14-21</td>\n",
       "      <td>广播;聚合;移动互联网;媒体融合;大数据</td>\n",
       "      <td>2014年是我国传统广播深入探索媒体融合的一年。从媒体形态转型、产业模式转型,到管理体制和机...</td>\n",
       "      <td>1002-8552</td>\n",
       "      <td>11-1746/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>271</td>\n",
       "      <td>271</td>\n",
       "      <td>2004—2016我国传媒经济学的研究进展(上)</td>\n",
       "      <td>吴信训;储靖伦;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2017-01-05</td>\n",
       "      <td>6</td>\n",
       "      <td>896</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2017</td>\n",
       "      <td>01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36-41</td>\n",
       "      <td>近10年;传媒经济;发展研究</td>\n",
       "      <td>近10年来,随着新媒体信息科技发展以及政治经济环境的改变,在一定意义上可以说,我国传媒经济发...</td>\n",
       "      <td>1002-2295</td>\n",
       "      <td>11-1109/G2</td>\n",
       "      <td>中文;</td>\n",
       "      <td>CNKI</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index   序号                                      题名     (第一)作者/主编  \\\n",
       "0      122  122                      高校党建新媒体传播的理论、方法与策略       卢迪;邱子欣;   \n",
       "1      314  314                      颠覆与重构:新闻融合传播的策略与路径          金莉萍;   \n",
       "2      162  162  题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析       文铭权;李朗;   \n",
       "3      208  208            面向媒体融合的出版企业内容运营策略——以RAYS平台为例  白立华;刘永坚;施其明;   \n",
       "4      111  111                     闯过深水区:媒体融合要抓住三大关键问题          徐世平;   \n",
       "..     ...  ...                                     ...           ...   \n",
       "369    363  363                              2015年传媒业猜想          郭全中;   \n",
       "370    353  353                      2014年网络新媒体研究新触点及走向           孟威;   \n",
       "371    361  361                        2014年中国新媒体传播研究综述          付玉辉;   \n",
       "372    358  358                           2014年中国广播发展图景           孟伟;   \n",
       "373    271  271                2004—2016我国传媒经济学的研究进展(上)      吴信训;储靖伦;   \n",
       "\n",
       "                 来源        发表时间  被引    下载               RT SR  ...    YR  IS  \\\n",
       "0              中国编辑  2019-06-10   7   656  Journal Article  1  ...  2019  06   \n",
       "1    现代传播(中国传媒大学学报)  2016-01-15  11   927  Journal Article  1  ...  2016  01   \n",
       "2               新闻界  2018-11-10   3   457  Journal Article  1  ...  2018  11   \n",
       "3                传媒  2018-01-25  11   291  Journal Article  1  ...  2018  02   \n",
       "4             新闻与写作  2019-08-05   0   151  Journal Article  1  ...  2019  08   \n",
       "..              ...         ...  ..   ...              ... ..  ...   ...  ..   \n",
       "369            青年记者  2015-01-10   2   197  Journal Article  1  ...  2015  01   \n",
       "370            当代传播  2015-03-15   4   732  Journal Article  1  ...  2015  02   \n",
       "371           国际新闻界  2015-01-23  26  2993  Journal Article  1  ...  2015  01   \n",
       "372        中国广播电视学刊  2015-03-01   8   357  Journal Article  1  ...  2015  03   \n",
       "373           新闻与写作  2017-01-05   6   896  Journal Article  1  ...  2017  01   \n",
       "\n",
       "      vo       OP                                    K1  \\\n",
       "0    NaN    20-26                             高校;党建;新媒体   \n",
       "1     38  158-159  新闻融合;策略与路径;互联网思维;内容为王;媒体融合;编辑部;策划设计;   \n",
       "2    NaN    91-94                             媒体融合;数据新闻   \n",
       "3    NaN    68-70                 出版企业;媒体融合;内容运营;RAYS平台   \n",
       "4    NaN    86-89                  媒体融合;信息革命;内容建设;资本;5G   \n",
       "..   ...      ...                                   ...   \n",
       "369  NaN    11-12                      传媒业;媒体融合;传统媒体广告;   \n",
       "370  NaN   4-7+13               媒体融合;手机传播;“微”传播;大数据;新媒体   \n",
       "371   37    35-46               新媒体;4G;媒体融合;互联网思维;产业互联网   \n",
       "372  NaN    14-21                  广播;聚合;移动互联网;媒体融合;大数据   \n",
       "373  NaN    36-41                        近10年;传媒经济;发展研究   \n",
       "\n",
       "                                                    AB         SN          CN  \\\n",
       "0    近年来,党中央对党建信息化提出了一系列新思想、新观点、新要求,为全面提高党建信息化水平指明了...  1671-9220  11-4795/G2   \n",
       "1    在移动互联网时代,新技术、大数据改变了人们接触、参与信息传播的方式,引发了传播技术和信息传播...  1007-8770  11-5363/G2   \n",
       "2    在媒体融合发展战略中,数据新闻是一种重要的实现形式。四川日报\"MORE数据新闻频道\"在题材选...  1007-2438  51-1046/G2   \n",
       "3    在媒体融合中,优质内容资源的合理运营是传统出版企业实现转型升级的基础。但当前许多传统出版企业...  1009-9263  11-4574/G2   \n",
       "4    传统媒体在体制机制、政策措施、流程管理、人才技术等方面加快融合步伐,建立融合传播矩阵、打造融...  1002-2295  11-1109/G2   \n",
       "..                                                 ...        ...         ...   \n",
       "369  互联网将成为影响2015年中国传媒业发展的关键因素,传媒业的很多方面都会打上互联网的烙印。2...  1002-2759  37-1003/G2   \n",
       "370  近两年,中国期刊网收录新媒体研究相关文献比起以往又有明显增加。文献在发表期刊和学科基础上虽有...  1009-5322  65-1201/G2   \n",
       "371  本文对2014年中国新媒体传播研究的整体情况进行了梳理和归纳,认为这年的中国新媒体传播研究进...  1002-5685  11-1523/G2   \n",
       "372  2014年是我国传统广播深入探索媒体融合的一年。从媒体形态转型、产业模式转型,到管理体制和机...  1002-8552  11-1746/G2   \n",
       "373  近10年来,随着新媒体信息科技发展以及政治经济环境的改变,在一定意义上可以说,我国传媒经济发...  1002-2295  11-1109/G2   \n",
       "\n",
       "      LA    DS  \n",
       "0    中文;  CNKI  \n",
       "1    中文;  CNKI  \n",
       "2    中文;  CNKI  \n",
       "3    中文;  CNKI  \n",
       "4    中文;  CNKI  \n",
       "..   ...   ...  \n",
       "369  中文;  CNKI  \n",
       "370  中文;  CNKI  \n",
       "371  中文;  CNKI  \n",
       "372  中文;  CNKI  \n",
       "373  中文;  CNKI  \n",
       "\n",
       "[374 rows x 24 columns]"
      ]
     },
     "execution_count": 509,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# def add_file():\n",
    "A = dfA.sort_values(by=['T1','JF','A1', \"YR\"], ascending=False)\n",
    "B = dfB.sort_values(by=['题名','来源','(第一)作者/主编', \"发表时间\"], ascending=False)\n",
    "dfC = B.reset_index().join(A.reset_index().drop('index', axis=1))\n",
    "dfC\n",
    "diff = dfC.query(\"T1!=题名 or JF!=来源\")\n",
    "print('除错',len(diff))\n",
    "print('检查数量',len(dfC))\n",
    "dfC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 510,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_to_keyword = dfC\n",
    "df_year =  dfC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 511,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_year.set_index(\"YR\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 512,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df_kw = df_year.loc[\"2015\"]\n",
    "\n",
    "# df_kw[\"K1\"].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 513,
   "metadata": {},
   "outputs": [],
   "source": [
    "# keyword_box = []\n",
    "a1 = [str(x).split(\";\") for x in df_kw[\"K1\"].values]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 514,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_kw = []\n",
    "for i in a1:\n",
    "    for j in i:\n",
    "        all_kw.append(j)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 515,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 去空值\n",
    "all_kw = [i for i in all_kw if i != '']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 516,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.value_counts(all_kw)\n",
    "kw_result = result.index.tolist()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 517,
   "metadata": {},
   "outputs": [],
   "source": [
    "kw_type = []\n",
    "for i in result.index:\n",
    "    kw_type.append(i)\n",
    "kw_accout= result.tolist()\n",
    "# kw_accout[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 518,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "list1 = []\n",
    "list2 = []\n",
    "for i in range(0,len(kw_accout)):\n",
    "    j1 = kw_result[i]\n",
    "    j2 = kw_accout[i]\n",
    "    list1.append(j1) \n",
    "    list1.append(j2)\n",
    "    list2.append(list1[i:i+2])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 519,
   "metadata": {},
   "outputs": [],
   "source": [
    "list3 = []\n",
    "for i in range(len(list2)):\n",
    "        if i %2 !=1:     #i为奇数\n",
    "            list3.append(list2[i])     #list[i]添加到lst1列表中\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 247,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2014 = list3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 520,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2015 = list3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 506,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2016 = list3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 455,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2017 = list3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 442,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 31],\n",
       " ['人工智能', 14],\n",
       " ['媒体融合发展', 9],\n",
       " ['大数据', 4],\n",
       " ['人工智能技术', 3],\n",
       " ['智能化媒体', 3],\n",
       " ['报业集团', 3],\n",
       " ['中央厨房', 3],\n",
       " ['新媒体', 3],\n",
       " ['传统媒体', 3],\n",
       " ['趋势', 3],\n",
       " ['媒体深度融合', 2],\n",
       " ['互联网治理', 2],\n",
       " ['云计算技术', 2],\n",
       " ['传播效果', 2],\n",
       " ['舆论引导', 2],\n",
       " ['智能化', 2],\n",
       " ['移动优先', 2],\n",
       " ['传媒业', 2],\n",
       " ['广电媒体', 2],\n",
       " ['移动端', 2],\n",
       " ['国家新战略', 2],\n",
       " ['盈利模式', 2],\n",
       " ['采编流程', 2],\n",
       " ['数字中国', 2],\n",
       " ['新生态', 2],\n",
       " ['媒介融合', 2],\n",
       " ['报业转型', 2],\n",
       " ['数字化转型', 2],\n",
       " ['全国两会', 2],\n",
       " ['媒体大脑', 2],\n",
       " ['研究热点', 1],\n",
       " ['大数据技术', 1],\n",
       " ['传播形态', 1],\n",
       " ['用户数据', 1],\n",
       " ['浙报集团', 1],\n",
       " ['北京时间', 1],\n",
       " ['绩效考核', 1],\n",
       " ['山东省', 1],\n",
       " ['发行量', 1],\n",
       " ['智能传播', 1],\n",
       " ['数字内容', 1],\n",
       " ['澳大利亚', 1],\n",
       " ['融合传播', 1],\n",
       " ['习近平总书记', 1],\n",
       " ['综合服务能力', 1],\n",
       " ['《星岛日报》', 1],\n",
       " ['方式创新', 1],\n",
       " ['机器写作', 1],\n",
       " ['技术逻辑', 1],\n",
       " ['新媒体融合', 1],\n",
       " ['新媒体技术', 1],\n",
       " ['地方纸媒', 1],\n",
       " ['数字版权保护技术', 1],\n",
       " ['期刊', 1],\n",
       " ['媒立方', 1],\n",
       " ['5G', 1],\n",
       " ['创新驱动', 1],\n",
       " ['内容运营', 1],\n",
       " ['广告市场规模', 1],\n",
       " ['全版权', 1],\n",
       " ['《春城晚报》', 1],\n",
       " ['文化自信', 1],\n",
       " ['融合发展', 1],\n",
       " ['事后管理', 1],\n",
       " ['运营模式', 1],\n",
       " ['华数集团', 1],\n",
       " ['党管数据', 1],\n",
       " ['指挥调度系统', 1],\n",
       " ['传播方式', 1],\n",
       " ['新网络+应用', 1],\n",
       " ['数字化趋势', 1],\n",
       " ['新闻教育', 1],\n",
       " ['《廊坊日报》', 1],\n",
       " ['新技术', 1],\n",
       " ['高校校园媒体', 1],\n",
       " ['技术服务商', 1],\n",
       " ['出版企业', 1],\n",
       " ['传统主流媒体', 1],\n",
       " ['副总编辑', 1],\n",
       " ['创新现状', 1],\n",
       " ['新闻舆论工作', 1],\n",
       " ['大数据挖掘', 1],\n",
       " ['广电新媒体', 1],\n",
       " ['技术解决方案', 1],\n",
       " ['新闻报道', 1],\n",
       " ['新闻资讯', 1],\n",
       " ['自媒体', 1],\n",
       " ['生态级媒体平台', 1],\n",
       " ['广播电视', 1],\n",
       " ['融合评估', 1],\n",
       " ['模式再造', 1],\n",
       " ['传统电视', 1],\n",
       " ['数字出版', 1],\n",
       " ['盈利模式创新', 1],\n",
       " ['中国报协', 1],\n",
       " ['平台化', 1],\n",
       " ['数据化', 1],\n",
       " ['重构', 1],\n",
       " ['新闻传播领域', 1],\n",
       " ['综述', 1],\n",
       " ['新闻管理', 1],\n",
       " ['研究方向', 1],\n",
       " ['实践与思考', 1],\n",
       " ['“互联网+”', 1],\n",
       " ['数据引擎', 1],\n",
       " ['激励机制', 1],\n",
       " ['热点', 1]]"
      ]
     },
     "execution_count": 442,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_2018 = list3\n",
    "list_2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 467,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2019 = list3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 479,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2020 = list3\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 491,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_2021 = list3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### pyecharts "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 521,
   "metadata": {},
   "outputs": [],
   "source": [
    "ALL = {\n",
    "    2014:list_2014,\n",
    "    2015:list_2015,\n",
    "    2016:list_2016,\n",
    "    2017:list_2017,\n",
    "    2018:list_2018,\n",
    "    2019:list_2019,\n",
    "    2020:list_2020,\n",
    "    2021:list_2021,\n",
    "#     2017:[[\"choose\",\"values\"],[],[],[],[]]\n",
    "}\n",
    "# ALL[2015]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 673,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 7], ['大数据', 2], ['其他', 19]]"
      ]
     },
     "execution_count": 673,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2014 = []\n",
    "for i in range(0,len(list_2014)):\n",
    "    if 1 in list_2014[i]:\n",
    "        other.append(list_2014[i])\n",
    "    else:\n",
    "        goal_2014.append(list_2014[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2014.append(other_goal)\n",
    "goal_2014"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 668,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 31],\n",
       " ['大数据', 13],\n",
       " ['媒体融合发展', 7],\n",
       " ['新媒体', 6],\n",
       " ['传统媒体', 5],\n",
       " ['互联网思维', 5],\n",
       " ['大数据时代', 4],\n",
       " ['新型主流媒体', 3],\n",
       " ['数字出版', 3],\n",
       " ['其他', 73]]"
      ]
     },
     "execution_count": 668,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2015 = []\n",
    "for i in range(0,len(list_2015)):\n",
    "    if 1 in list_2015[i] or 2 in list_2015[i]:\n",
    "        other.append(list_2015[i])\n",
    "    else:\n",
    "        goal_2015.append(list_2015[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2015.append(other_goal)\n",
    "goal_2015"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 674,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 28],\n",
       " ['传统媒体', 7],\n",
       " ['大数据', 5],\n",
       " ['互联网思维', 4],\n",
       " ['媒体融合发展', 4],\n",
       " ['全媒体平台', 3],\n",
       " ['人民日报', 3],\n",
       " ['其他', 81]]"
      ]
     },
     "execution_count": 674,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2016 = []\n",
    "for i in range(0,len(list_2016)):\n",
    "    if 1 in list_2016[i] or 2 in list_2016[i]:\n",
    "        other.append(list_2016[i])\n",
    "    else:\n",
    "        goal_2016.append(list_2016[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2016.append(other_goal)\n",
    "goal_2016"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 675,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 34],\n",
       " ['大数据', 9],\n",
       " ['传统媒体', 6],\n",
       " ['人工智能', 5],\n",
       " ['媒体融合发展', 5],\n",
       " ['新媒体', 5],\n",
       " ['新闻客户端', 5],\n",
       " ['媒体深度融合', 4],\n",
       " ['新兴媒体', 4],\n",
       " ['移动直播', 3],\n",
       " ['主流媒体', 3],\n",
       " ['中国传媒', 3],\n",
       " ['报业集团', 3],\n",
       " ['其他', 96]]"
      ]
     },
     "execution_count": 675,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2017 = []\n",
    "for i in range(0,len(list_2017)):\n",
    "    if 1 in list_2017[i] or 2 in list_2017[i]:\n",
    "        other.append(list_2017[i])\n",
    "    else:\n",
    "        goal_2017.append(list_2017[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2017.append(other_goal)\n",
    "goal_2017"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 677,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 31],\n",
       " ['人工智能', 14],\n",
       " ['媒体融合发展', 9],\n",
       " ['大数据', 4],\n",
       " ['人工智能技术', 3],\n",
       " ['智能化媒体', 3],\n",
       " ['报业集团', 3],\n",
       " ['中央厨房', 3],\n",
       " ['新媒体', 3],\n",
       " ['传统媒体', 3],\n",
       " ['趋势', 3],\n",
       " ['其他', 97]]"
      ]
     },
     "execution_count": 677,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2018 = []\n",
    "for i in range(0,len(list_2018)):\n",
    "    if 1 in list_2018[i] or 2 in list_2018[i]:\n",
    "        other.append(list_2018[i])\n",
    "    else:\n",
    "        goal_2018.append(list_2018[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2018.append(other_goal)\n",
    "goal_2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 678,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 47],\n",
       " ['人工智能', 21],\n",
       " ['新华社', 5],\n",
       " ['智能媒体', 4],\n",
       " ['传统媒体', 4],\n",
       " ['媒体融合发展', 4],\n",
       " ['县级融媒体', 4],\n",
       " ['5G', 4],\n",
       " ['人工智能技术', 4],\n",
       " ['全媒体', 4],\n",
       " ['全国两会', 4],\n",
       " ['新媒体', 4],\n",
       " ['短视频', 3],\n",
       " ['两会报道', 3],\n",
       " ['大数据', 3],\n",
       " ['智慧广电', 3],\n",
       " ['其他', 109]]"
      ]
     },
     "execution_count": 678,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2019 = []\n",
    "for i in range(0,len(list_2019)):\n",
    "    if 1 in list_2019[i] or 2 in list_2019[i]:\n",
    "        other.append(list_2019[i])\n",
    "    else:\n",
    "        goal_2019.append(list_2019[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2019.append(other_goal)\n",
    "goal_2019"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 679,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 35],\n",
       " ['人工智能', 13],\n",
       " ['5G', 9],\n",
       " ['媒体融合发展', 8],\n",
       " ['短视频', 7],\n",
       " ['传统媒体', 4],\n",
       " ['传媒业', 4],\n",
       " ['智能媒体', 4],\n",
       " ['全媒体', 3],\n",
       " ['媒体深度融合', 3],\n",
       " ['内容生产', 3],\n",
       " ['疫情防控', 3],\n",
       " ['融媒体', 3],\n",
       " ['其他', 86]]"
      ]
     },
     "execution_count": 679,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2020 = []\n",
    "for i in range(0,len(list_2020)):\n",
    "    if 1 in list_2020[i] or 2 in list_2020[i]:\n",
    "        other.append(list_2020[i])\n",
    "    else:\n",
    "        goal_2020.append(list_2020[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2020.append(other_goal)\n",
    "goal_2020"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 680,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['媒体融合', 14], ['人工智能', 7], ['大数据', 3], ['其他', 48]]"
      ]
     },
     "execution_count": 680,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = []\n",
    "goal_2021 = []\n",
    "for i in range(0,len(list_2021)):\n",
    "    if 1 in list_2021[i] or 2 in list_2021[i]:\n",
    "        other.append(list_2021[i])\n",
    "    else:\n",
    "        goal_2021.append(list_2021[i])\n",
    "other_goal = [\"其他\",len(other)]\n",
    "goal_2021.append(other_goal)\n",
    "goal_2021"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 681,
   "metadata": {},
   "outputs": [],
   "source": [
    "ALL = {\n",
    "    2014:goal_2014,\n",
    "    2015:goal_2015,\n",
    "    2016:goal_2016,\n",
    "    2017:goal_2017,\n",
    "    2018:goal_2018,\n",
    "    2019:goal_2019,\n",
    "    2020:goal_2020,\n",
    "    2021:goal_2021,\n",
    "#     2017:[[\"choose\",\"values\"],[],[],[],[]]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 685,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\timeline_pie.html'"
      ]
     },
     "execution_count": 685,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Pie, Timeline\n",
    "from pyecharts.faker import Faker\n",
    "\n",
    "# attr = Faker.choose()\n",
    "tl = Timeline()\n",
    "\n",
    "for i in range(2014,2022):\n",
    "    pie = (\n",
    "        Pie()\n",
    "        .add(\n",
    "            \"关键词\",\n",
    "            ALL[i],\n",
    "            rosetype=\"radius\",\n",
    "            radius=[\"30%\", \"55%\"],\n",
    "            label_opts=opts.LabelOpts(is_show=False, position=\"center\"),\n",
    "        )\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"关键字时序趋势分析\",pos_left=\"center\",pos_top=\"10\"),legend_opts=opts.LegendOpts(type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"))\n",
    "        .set_series_opts(label_opts=opts.LabelOpts(formatter=\"{b}: {c}\"))\n",
    "    )\n",
    "    tl.add(pie, \"{}年\".format(i))\n",
    "tl.render(\"timeline_pie.html\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_to_keyword['KW1'] = df_to_keyword['K1'].str.split(';').str[0]\n",
    "df_to_keyword['KW2'] = df_to_keyword['K1'].str.split(';').str[1]\n",
    "df_to_keyword['KW3'] = df_to_keyword['K1'].str.split(';').str[2]\n",
    "df_to_keyword['KW4'] = df_to_keyword['K1'].str.split(';').str[3]\n",
    "df_to_keyword['KW5'] = df_to_keyword['K1'].str.split(';').str[4]\n",
    "df_to_keyword['KW6'] = df_to_keyword['K1'].str.split(';').str[5]\n",
    "df_to_keyword['KW7'] = df_to_keyword['K1'].str.split(';').str[6]\n",
    "df_to_keyword['KW8'] = df_to_keyword['K1'].str.split(';').str[7]\n",
    "df_to_keyword['KW9'] = df_to_keyword['K1'].str.split(';').str[8]\n",
    "df_to_keyword['KW10'] = df_to_keyword['K1'].str.split(';').str[9]\n",
    "df_to_keyword['KW11'] = df_to_keyword['K1'].str.split(';').str[10]\n",
    "df_to_keyword['KW12'] = df_to_keyword['K1'].str.split(';').str[11]\n",
    "df_to_keyword['KW13'] = df_to_keyword['K1'].str.split(';').str[12]\n",
    "df_to_keyword['KW14'] = df_to_keyword['K1'].str.split(';').str[13]\n",
    "df_to_keyword['KW15'] = df_to_keyword['K1'].str.split(';').str[14]\n",
    "df_to_keyword['KW16'] = df_to_keyword['K1'].str.split(';').str[15]\n",
    "df_to_keyword['KW17'] = df_to_keyword['K1'].str.split(';').str[16]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>序号</th>\n",
       "      <th>题名</th>\n",
       "      <th>(第一)作者/主编</th>\n",
       "      <th>来源</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>RT</th>\n",
       "      <th>SR</th>\n",
       "      <th>...</th>\n",
       "      <th>KW8</th>\n",
       "      <th>KW9</th>\n",
       "      <th>KW10</th>\n",
       "      <th>KW11</th>\n",
       "      <th>KW12</th>\n",
       "      <th>KW13</th>\n",
       "      <th>KW14</th>\n",
       "      <th>KW15</th>\n",
       "      <th>KW16</th>\n",
       "      <th>KW17</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>122</td>\n",
       "      <td>高校党建新媒体传播的理论、方法与策略</td>\n",
       "      <td>卢迪;邱子欣;</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2019-06-10</td>\n",
       "      <td>7</td>\n",
       "      <td>656</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>314</td>\n",
       "      <td>314</td>\n",
       "      <td>颠覆与重构:新闻融合传播的策略与路径</td>\n",
       "      <td>金莉萍;</td>\n",
       "      <td>现代传播(中国传媒大学学报)</td>\n",
       "      <td>2016-01-15</td>\n",
       "      <td>11</td>\n",
       "      <td>927</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>162</td>\n",
       "      <td>162</td>\n",
       "      <td>题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析</td>\n",
       "      <td>文铭权;李朗;</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2018-11-10</td>\n",
       "      <td>3</td>\n",
       "      <td>457</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>208</td>\n",
       "      <td>208</td>\n",
       "      <td>面向媒体融合的出版企业内容运营策略——以RAYS平台为例</td>\n",
       "      <td>白立华;刘永坚;施其明;</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2018-01-25</td>\n",
       "      <td>11</td>\n",
       "      <td>291</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>111</td>\n",
       "      <td>111</td>\n",
       "      <td>闯过深水区:媒体融合要抓住三大关键问题</td>\n",
       "      <td>徐世平;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2019-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>2015年传媒业猜想</td>\n",
       "      <td>郭全中;</td>\n",
       "      <td>青年记者</td>\n",
       "      <td>2015-01-10</td>\n",
       "      <td>2</td>\n",
       "      <td>197</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>353</td>\n",
       "      <td>353</td>\n",
       "      <td>2014年网络新媒体研究新触点及走向</td>\n",
       "      <td>孟威;</td>\n",
       "      <td>当代传播</td>\n",
       "      <td>2015-03-15</td>\n",
       "      <td>4</td>\n",
       "      <td>732</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>361</td>\n",
       "      <td>361</td>\n",
       "      <td>2014年中国新媒体传播研究综述</td>\n",
       "      <td>付玉辉;</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2015-01-23</td>\n",
       "      <td>26</td>\n",
       "      <td>2993</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>358</td>\n",
       "      <td>358</td>\n",
       "      <td>2014年中国广播发展图景</td>\n",
       "      <td>孟伟;</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2015-03-01</td>\n",
       "      <td>8</td>\n",
       "      <td>357</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>271</td>\n",
       "      <td>271</td>\n",
       "      <td>2004—2016我国传媒经济学的研究进展(上)</td>\n",
       "      <td>吴信训;储靖伦;</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2017-01-05</td>\n",
       "      <td>6</td>\n",
       "      <td>896</td>\n",
       "      <td>Journal Article</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index   序号                                      题名     (第一)作者/主编  \\\n",
       "0      122  122                      高校党建新媒体传播的理论、方法与策略       卢迪;邱子欣;   \n",
       "1      314  314                      颠覆与重构:新闻融合传播的策略与路径          金莉萍;   \n",
       "2      162  162  题材选择、数据处理与视觉表达——四川日报“MORE数据新闻频道”新闻作品分析       文铭权;李朗;   \n",
       "3      208  208            面向媒体融合的出版企业内容运营策略——以RAYS平台为例  白立华;刘永坚;施其明;   \n",
       "4      111  111                     闯过深水区:媒体融合要抓住三大关键问题          徐世平;   \n",
       "..     ...  ...                                     ...           ...   \n",
       "369    363  363                              2015年传媒业猜想          郭全中;   \n",
       "370    353  353                      2014年网络新媒体研究新触点及走向           孟威;   \n",
       "371    361  361                        2014年中国新媒体传播研究综述          付玉辉;   \n",
       "372    358  358                           2014年中国广播发展图景           孟伟;   \n",
       "373    271  271                2004—2016我国传媒经济学的研究进展(上)      吴信训;储靖伦;   \n",
       "\n",
       "                 来源        发表时间  被引    下载               RT SR  ...  KW8  KW9  \\\n",
       "0              中国编辑  2019-06-10   7   656  Journal Article  1  ...  NaN  NaN   \n",
       "1    现代传播(中国传媒大学学报)  2016-01-15  11   927  Journal Article  1  ...       NaN   \n",
       "2               新闻界  2018-11-10   3   457  Journal Article  1  ...  NaN  NaN   \n",
       "3                传媒  2018-01-25  11   291  Journal Article  1  ...  NaN  NaN   \n",
       "4             新闻与写作  2019-08-05   0   151  Journal Article  1  ...  NaN  NaN   \n",
       "..              ...         ...  ..   ...              ... ..  ...  ...  ...   \n",
       "369            青年记者  2015-01-10   2   197  Journal Article  1  ...  NaN  NaN   \n",
       "370            当代传播  2015-03-15   4   732  Journal Article  1  ...  NaN  NaN   \n",
       "371           国际新闻界  2015-01-23  26  2993  Journal Article  1  ...  NaN  NaN   \n",
       "372        中国广播电视学刊  2015-03-01   8   357  Journal Article  1  ...  NaN  NaN   \n",
       "373           新闻与写作  2017-01-05   6   896  Journal Article  1  ...  NaN  NaN   \n",
       "\n",
       "    KW10 KW11 KW12 KW13 KW14 KW15 KW16 KW17  \n",
       "0    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "1    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "3    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "4    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "..   ...  ...  ...  ...  ...  ...  ...  ...  \n",
       "369  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "370  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "371  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "372  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "373  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "\n",
       "[374 rows x 42 columns]"
      ]
     },
     "execution_count": 254,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_to_keyword"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>YR</th>\n",
       "      <th>KW1</th>\n",
       "      <th>KW2</th>\n",
       "      <th>KW3</th>\n",
       "      <th>KW4</th>\n",
       "      <th>KW5</th>\n",
       "      <th>KW6</th>\n",
       "      <th>KW7</th>\n",
       "      <th>KW8</th>\n",
       "      <th>KW9</th>\n",
       "      <th>KW10</th>\n",
       "      <th>KW11</th>\n",
       "      <th>KW12</th>\n",
       "      <th>KW13</th>\n",
       "      <th>KW14</th>\n",
       "      <th>KW15</th>\n",
       "      <th>KW16</th>\n",
       "      <th>KW17</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019</td>\n",
       "      <td>高校</td>\n",
       "      <td>党建</td>\n",
       "      <td>新媒体</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016</td>\n",
       "      <td>新闻融合</td>\n",
       "      <td>策略与路径</td>\n",
       "      <td>互联网思维</td>\n",
       "      <td>内容为王</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>编辑部</td>\n",
       "      <td>策划设计</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2018</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>数据新闻</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2018</td>\n",
       "      <td>出版企业</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>内容运营</td>\n",
       "      <td>RAYS平台</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>信息革命</td>\n",
       "      <td>内容建设</td>\n",
       "      <td>资本</td>\n",
       "      <td>5G</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>2015</td>\n",
       "      <td>传媒业</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>传统媒体广告</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>2015</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>手机传播</td>\n",
       "      <td>“微”传播</td>\n",
       "      <td>大数据</td>\n",
       "      <td>新媒体</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>2015</td>\n",
       "      <td>新媒体</td>\n",
       "      <td>4G</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>互联网思维</td>\n",
       "      <td>产业互联网</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>2015</td>\n",
       "      <td>广播</td>\n",
       "      <td>聚合</td>\n",
       "      <td>移动互联网</td>\n",
       "      <td>媒体融合</td>\n",
       "      <td>大数据</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>2017</td>\n",
       "      <td>近10年</td>\n",
       "      <td>传媒经济</td>\n",
       "      <td>发展研究</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       YR   KW1    KW2     KW3     KW4    KW5  KW6   KW7  KW8  KW9 KW10 KW11  \\\n",
       "0    2019    高校     党建     新媒体     NaN    NaN  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "1    2016  新闻融合  策略与路径   互联网思维    内容为王   媒体融合  编辑部  策划设计       NaN  NaN  NaN   \n",
       "2    2018  媒体融合   数据新闻     NaN     NaN    NaN  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "3    2018  出版企业   媒体融合    内容运营  RAYS平台    NaN  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "4    2019  媒体融合   信息革命    内容建设      资本     5G  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "..    ...   ...    ...     ...     ...    ...  ...   ...  ...  ...  ...  ...   \n",
       "369  2015   传媒业   媒体融合  传统媒体广告            NaN  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "370  2015  媒体融合   手机传播   “微”传播     大数据    新媒体  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "371  2015   新媒体     4G    媒体融合   互联网思维  产业互联网  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "372  2015    广播     聚合   移动互联网    媒体融合    大数据  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "373  2017  近10年   传媒经济    发展研究     NaN    NaN  NaN   NaN  NaN  NaN  NaN  NaN   \n",
       "\n",
       "    KW12 KW13 KW14 KW15 KW16 KW17  \n",
       "0    NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "1    NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2    NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "3    NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "4    NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "..   ...  ...  ...  ...  ...  ...  \n",
       "369  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "370  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "371  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "372  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "373  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "\n",
       "[374 rows x 18 columns]"
      ]
     },
     "execution_count": 280,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_YRandKW = df_to_keyword[[\"YR\",\"KW1\",\"KW2\",\"KW3\",\"KW4\",\"KW5\",\"KW6\",\"KW7\",\"KW8\",\"KW9\",\"KW10\",\"KW11\",\"KW12\",\"KW13\",\"KW14\",\"KW15\",\"KW16\",\"KW17\"]]\n",
    "data_YRandKW"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_YRandKW.set_index(\"YR\", inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>KW1</th>\n",
       "      <th>KW2</th>\n",
       "      <th>KW3</th>\n",
       "      <th>KW4</th>\n",
       "      <th>KW5</th>\n",
       "      <th>KW6</th>\n",
       "      <th>KW7</th>\n",
       "      <th>KW8</th>\n",
       "      <th>KW9</th>\n",
       "      <th>KW10</th>\n",
       "      <th>KW11</th>\n",
       "      <th>KW12</th>\n",
       "      <th>KW13</th>\n",
       "      <th>KW14</th>\n",
       "      <th>KW15</th>\n",
       "      <th>KW16</th>\n",
       "      <th>KW17</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YR</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>高校</td>\n",
       "      <td>党建</td>\n",
       "      <td>新媒体</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>媒体融合</td>\n",
       "      <td>信息革命</td>\n",
       "      <td>内容建设</td>\n",
       "      <td>资本</td>\n",
       "      <td>5G</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>数字出版产业</td>\n",
       "      <td>中国数字出版</td>\n",
       "      <td>出版业转型</td>\n",
       "      <td>数字内容产业</td>\n",
       "      <td>短视频平台</td>\n",
       "      <td>移动出版</td>\n",
       "      <td>网络文学</td>\n",
       "      <td>媒体融合发展</td>\n",
       "      <td>年度报告</td>\n",
       "      <td>人工智能技术</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>媒体融合发展</td>\n",
       "      <td>优化创新</td>\n",
       "      <td>社交平台</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>《当代贵州》</td>\n",
       "      <td>新时期</td>\n",
       "      <td>党刊</td>\n",
       "      <td>融媒体发展</td>\n",
       "      <td>策略</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>县级融媒体</td>\n",
       "      <td>邳州模式</td>\n",
       "      <td>党管媒体</td>\n",
       "      <td>社会效益</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>发展前瞻</td>\n",
       "      <td>积极应对</td>\n",
       "      <td>全面建成小康社会</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>媒体融合</td>\n",
       "      <td>全国两会</td>\n",
       "      <td>时政报道</td>\n",
       "      <td>创新</td>\n",
       "      <td>Vlog</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>新媒体</td>\n",
       "      <td>网络</td>\n",
       "      <td>“两微一端”、媒体融合</td>\n",
       "      <td>可视化、短视频、学生青年、舆论</td>\n",
       "      <td>自媒体</td>\n",
       "      <td>网络安全</td>\n",
       "      <td>治理</td>\n",
       "      <td>5G、VR、算法、人工智能</td>\n",
       "      <td>理论建构</td>\n",
       "      <td>新技术</td>\n",
       "      <td>热点</td>\n",
       "      <td>新意</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019</th>\n",
       "      <td>媒体融合</td>\n",
       "      <td>突破</td>\n",
       "      <td>平台建设</td>\n",
       "      <td>县级融媒体</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>72 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         KW1     KW2          KW3              KW4    KW5   KW6   KW7  \\\n",
       "YR                                                                      \n",
       "2019      高校      党建          新媒体              NaN    NaN   NaN   NaN   \n",
       "2019    媒体融合    信息革命         内容建设               资本     5G   NaN   NaN   \n",
       "2019  数字出版产业  中国数字出版        出版业转型           数字内容产业  短视频平台  移动出版  网络文学   \n",
       "2019  媒体融合发展    优化创新         社交平台                     NaN   NaN   NaN   \n",
       "2019  《当代贵州》     新时期           党刊            融媒体发展     策略   NaN   NaN   \n",
       "...      ...     ...          ...              ...    ...   ...   ...   \n",
       "2019   县级融媒体    邳州模式         党管媒体             社会效益    NaN   NaN   NaN   \n",
       "2019    发展前瞻    积极应对     全面建成小康社会                     NaN   NaN   NaN   \n",
       "2019    媒体融合    全国两会         时政报道               创新   Vlog   NaN   NaN   \n",
       "2019     新媒体      网络  “两微一端”、媒体融合  可视化、短视频、学生青年、舆论    自媒体  网络安全    治理   \n",
       "2019    媒体融合      突破         平台建设            县级融媒体    NaN   NaN   NaN   \n",
       "\n",
       "                KW8   KW9    KW10 KW11 KW12 KW13 KW14 KW15 KW16 KW17  \n",
       "YR                                                                    \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019         媒体融合发展  年度报告  人工智能技术       NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "...             ...   ...     ...  ...  ...  ...  ...  ...  ...  ...  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "2019  5G、VR、算法、人工智能  理论建构     新技术   热点   新意  NaN  NaN  NaN  NaN  NaN  \n",
       "2019            NaN   NaN     NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  \n",
       "\n",
       "[72 rows x 17 columns]"
      ]
     },
     "execution_count": 282,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_YRandKW.loc['2019']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 期刊分析——饼状图\n",
    "* https://gallery.pyecharts.org/#/Dataset/dataset_pie"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 文献数+期刊"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 527,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_to_来源= dfC\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 528,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# df_source = df_to_来源.groupby('来源').sum().sort_values\n",
    "df_source = df_to_来源[\"来源\"].value_counts()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 529,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = df_source.index\n",
    "type_source = a.tolist()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 530,
   "metadata": {},
   "outputs": [],
   "source": [
    "count_source = df_source.tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 531,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\pie.html'"
      ]
     },
     "execution_count": 531,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_to_source = data\n",
    "df_source  = df_to_source[\"来源\"].value_counts()\n",
    "a = df_source.index\n",
    "type_source = a.tolist()\n",
    "count_source = df_source.tolist()\n",
    "\n",
    "source_rate=[]\n",
    "for i in range(0,len(count_source)):\n",
    "    j = str(count_source[i]/374)+'%'\n",
    "    source_rate.append(j)\n",
    "\n",
    "\n",
    "#-------------------------------------------------------\n",
    "def Pie_base() :\n",
    "    p = (\n",
    "        Pie()\n",
    "        .add(\"\", [list(z) for z in zip(type_source, count_source)],center=['50%', '50%'],radius=\"55%\",label_opts=opts.LabelOpts(is_show=False, position=\"center\"),)\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"文献来源期刊分布\",pos_left=\"center\",pos_top=\"20\"),legend_opts=opts.LegendOpts(type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"))\n",
    "\n",
    "        # .set_series_opts(label_opts=opts.LabelOpts(formatter=\"{b}: {c}\"))\n",
    "        .set_series_opts(\n",
    "        tooltip_opts=opts.TooltipOpts(\n",
    "            trigger=\"item\", formatter=\"{b}: {c} ({d}%)\"\n",
    "        ))\n",
    "        .render(\"pie.html\")\n",
    "\n",
    "        \n",
    "        )\n",
    "\n",
    "    return p\n",
    "\n",
    "Pie_base()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 被引量+期刊"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 614,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_quote = dfC\n",
    "\n",
    "df_quote[\"被引\"] = df_quote[\"被引\"].astype('float')\n",
    "# df_quote.sort_values(by=\"被引\")\n",
    "df_quote.groupby('来源',as_index=False).sum()\n",
    "\n",
    "df_data_quote = df_data_quote.sort_values(by=\"被引\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 628,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[550.0,\n",
       " 204.0,\n",
       " 165.0,\n",
       " 158.0,\n",
       " 132.0,\n",
       " 113.0,\n",
       " 112.0,\n",
       " 78.0,\n",
       " 75.0,\n",
       " 71.0,\n",
       " 66.0,\n",
       " 62.0,\n",
       " 58.0,\n",
       " 56.0,\n",
       " 56.0,\n",
       " 55.0,\n",
       " 55.0,\n",
       " 51.0,\n",
       " 40.0,\n",
       " 37.0,\n",
       " 29.0,\n",
       " 26.0,\n",
       " 23.0,\n",
       " 19.0,\n",
       " 19.0,\n",
       " 19.0,\n",
       " 14.0,\n",
       " 14.0,\n",
       " 14.0,\n",
       " 14.0,\n",
       " 12.0,\n",
       " 9.0,\n",
       " 8.0,\n",
       " 8.0,\n",
       " 7.0,\n",
       " 6.0,\n",
       " 6.0,\n",
       " 6.0,\n",
       " 4.0,\n",
       " 4.0,\n",
       " 4.0,\n",
       " 4.0,\n",
       " 4.0,\n",
       " 4.0,\n",
       " 3.0,\n",
       " 3.0,\n",
       " 3.0,\n",
       " 3.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 0.0,\n",
       " 0.0,\n",
       " 0.0,\n",
       " 0.0]"
      ]
     },
     "execution_count": 628,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df_quote.sort_values(by=\"被引\")\n",
    "df_data_quote[\"来源\"].values.tolist()\n",
    "df_data_quote[\"被引\"].values.tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 下载期刊"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 616,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_subject = dfC\n",
    "\n",
    "df_subject[\"下载\"] = df_subject[\"下载\"].astype('float')\n",
    "# df_quote.sort_values(by=\"被引\")\n",
    "df_subject.groupby('来源',as_index=False).sum()\n",
    "\n",
    "df_data_subject = df_data_subject.sort_values(by=\"下载\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 617,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>来源</th>\n",
       "      <th>index</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>9298</td>\n",
       "      <td>550.0</td>\n",
       "      <td>44670.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>现代传播(中国传媒大学学报)</td>\n",
       "      <td>1992</td>\n",
       "      <td>204.0</td>\n",
       "      <td>19625.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>传媒</td>\n",
       "      <td>11129</td>\n",
       "      <td>165.0</td>\n",
       "      <td>16153.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>当代传播</td>\n",
       "      <td>1794</td>\n",
       "      <td>56.0</td>\n",
       "      <td>13656.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>青年记者</td>\n",
       "      <td>9470</td>\n",
       "      <td>78.0</td>\n",
       "      <td>11167.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>出版广角</td>\n",
       "      <td>2498</td>\n",
       "      <td>132.0</td>\n",
       "      <td>8165.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>5581</td>\n",
       "      <td>113.0</td>\n",
       "      <td>7762.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>编辑之友</td>\n",
       "      <td>726</td>\n",
       "      <td>66.0</td>\n",
       "      <td>6656.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>电视研究</td>\n",
       "      <td>2335</td>\n",
       "      <td>51.0</td>\n",
       "      <td>5478.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>新闻界</td>\n",
       "      <td>714</td>\n",
       "      <td>158.0</td>\n",
       "      <td>5137.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>新闻记者</td>\n",
       "      <td>557</td>\n",
       "      <td>112.0</td>\n",
       "      <td>5071.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>出版发行研究</td>\n",
       "      <td>522</td>\n",
       "      <td>71.0</td>\n",
       "      <td>4825.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中国出版</td>\n",
       "      <td>1474</td>\n",
       "      <td>75.0</td>\n",
       "      <td>4756.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>新闻战线</td>\n",
       "      <td>5006</td>\n",
       "      <td>55.0</td>\n",
       "      <td>4276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>新闻大学</td>\n",
       "      <td>304</td>\n",
       "      <td>55.0</td>\n",
       "      <td>3245.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>361</td>\n",
       "      <td>26.0</td>\n",
       "      <td>2993.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>中国编辑</td>\n",
       "      <td>908</td>\n",
       "      <td>19.0</td>\n",
       "      <td>2873.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>现代经济探讨</td>\n",
       "      <td>319</td>\n",
       "      <td>58.0</td>\n",
       "      <td>2288.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国科技期刊研究</td>\n",
       "      <td>675</td>\n",
       "      <td>62.0</td>\n",
       "      <td>2110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>现代出版</td>\n",
       "      <td>344</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2108.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>科技与出版</td>\n",
       "      <td>885</td>\n",
       "      <td>40.0</td>\n",
       "      <td>2022.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>人民论坛·学术前沿</td>\n",
       "      <td>458</td>\n",
       "      <td>29.0</td>\n",
       "      <td>1726.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>当代电视</td>\n",
       "      <td>862</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1698.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>重庆社会科学</td>\n",
       "      <td>362</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1555.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>中国记者</td>\n",
       "      <td>2054</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1365.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>思想理论教育导刊</td>\n",
       "      <td>118</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1265.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>学习与实践</td>\n",
       "      <td>256</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1226.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>中国电视</td>\n",
       "      <td>396</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1182.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>中国行政管理</td>\n",
       "      <td>115</td>\n",
       "      <td>7.0</td>\n",
       "      <td>998.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>思想政治教育研究</td>\n",
       "      <td>165</td>\n",
       "      <td>14.0</td>\n",
       "      <td>942.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>733</td>\n",
       "      <td>9.0</td>\n",
       "      <td>901.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>学校党建与思想教育</td>\n",
       "      <td>51</td>\n",
       "      <td>1.0</td>\n",
       "      <td>733.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>江淮论坛</td>\n",
       "      <td>295</td>\n",
       "      <td>37.0</td>\n",
       "      <td>680.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>电视技术</td>\n",
       "      <td>2446</td>\n",
       "      <td>23.0</td>\n",
       "      <td>680.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>山西档案</td>\n",
       "      <td>629</td>\n",
       "      <td>19.0</td>\n",
       "      <td>577.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>编辑学报</td>\n",
       "      <td>157</td>\n",
       "      <td>6.0</td>\n",
       "      <td>523.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>河南大学学报(社会科学版)</td>\n",
       "      <td>44</td>\n",
       "      <td>1.0</td>\n",
       "      <td>511.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>江苏社会科学</td>\n",
       "      <td>316</td>\n",
       "      <td>4.0</td>\n",
       "      <td>465.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>东南学术</td>\n",
       "      <td>325</td>\n",
       "      <td>14.0</td>\n",
       "      <td>438.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>南京社会科学</td>\n",
       "      <td>119</td>\n",
       "      <td>4.0</td>\n",
       "      <td>418.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>上海大学学报(社会科学版)</td>\n",
       "      <td>66</td>\n",
       "      <td>4.0</td>\n",
       "      <td>411.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>甘肃社会科学</td>\n",
       "      <td>273</td>\n",
       "      <td>4.0</td>\n",
       "      <td>389.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>郑州大学学报(哲学社会科学版)</td>\n",
       "      <td>22</td>\n",
       "      <td>0.0</td>\n",
       "      <td>333.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>西南民族大学学报(人文社科版)</td>\n",
       "      <td>254</td>\n",
       "      <td>14.0</td>\n",
       "      <td>325.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>企业经济</td>\n",
       "      <td>227</td>\n",
       "      <td>3.0</td>\n",
       "      <td>297.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>行政管理改革</td>\n",
       "      <td>88</td>\n",
       "      <td>1.0</td>\n",
       "      <td>286.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>中州学刊</td>\n",
       "      <td>222</td>\n",
       "      <td>8.0</td>\n",
       "      <td>277.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>人民论坛</td>\n",
       "      <td>58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>270.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>出版科学</td>\n",
       "      <td>35</td>\n",
       "      <td>0.0</td>\n",
       "      <td>263.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>社会科学家</td>\n",
       "      <td>310</td>\n",
       "      <td>8.0</td>\n",
       "      <td>259.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>开放导报</td>\n",
       "      <td>339</td>\n",
       "      <td>1.0</td>\n",
       "      <td>255.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>中国远程教育</td>\n",
       "      <td>372</td>\n",
       "      <td>3.0</td>\n",
       "      <td>240.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>理论视野</td>\n",
       "      <td>37</td>\n",
       "      <td>0.0</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>湖南社会科学</td>\n",
       "      <td>168</td>\n",
       "      <td>3.0</td>\n",
       "      <td>191.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>浙江师范大学学报(社会科学版)</td>\n",
       "      <td>293</td>\n",
       "      <td>6.0</td>\n",
       "      <td>172.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>科技通报</td>\n",
       "      <td>234</td>\n",
       "      <td>0.0</td>\n",
       "      <td>117.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中国统计</td>\n",
       "      <td>298</td>\n",
       "      <td>3.0</td>\n",
       "      <td>115.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>海南大学学报(人文社会科学版)</td>\n",
       "      <td>36</td>\n",
       "      <td>1.0</td>\n",
       "      <td>105.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 来源  index     被引       下载\n",
       "29            新闻与写作   9298  550.0  44670.0\n",
       "41   现代传播(中国传媒大学学报)   1992  204.0  19625.0\n",
       "15               传媒  11129  165.0  16153.0\n",
       "25             当代传播   1794   56.0  13656.0\n",
       "57             青年记者   9470   78.0  11167.0\n",
       "17             出版广角   2498  132.0   8165.0\n",
       "3          中国广播电视学刊   5581  113.0   7762.0\n",
       "51             编辑之友    726   66.0   6656.0\n",
       "47             电视研究   2335   51.0   5478.0\n",
       "33              新闻界    714  158.0   5137.0\n",
       "34             新闻记者    557  112.0   5071.0\n",
       "16           出版发行研究    522   71.0   4825.0\n",
       "2              中国出版   1474   75.0   4756.0\n",
       "31             新闻战线   5006   55.0   4276.0\n",
       "30             新闻大学    304   55.0   3245.0\n",
       "20            国际新闻界    361   26.0   2993.0\n",
       "7              中国编辑    908   19.0   2873.0\n",
       "43           现代经济探讨    319   58.0   2288.0\n",
       "5          中国科技期刊研究    675   62.0   2110.0\n",
       "42             现代出版    344    4.0   2108.0\n",
       "49            科技与出版    885   40.0   2022.0\n",
       "13        人民论坛·学术前沿    458   29.0   1726.0\n",
       "26             当代电视    862   19.0   1698.0\n",
       "56           重庆社会科学    362   12.0   1555.0\n",
       "9              中国记者   2054   56.0   1365.0\n",
       "28         思想理论教育导刊    118    6.0   1265.0\n",
       "21            学习与实践    256   14.0   1226.0\n",
       "4              中国电视    396    4.0   1182.0\n",
       "8            中国行政管理    115    7.0    998.0\n",
       "27         思想政治教育研究    165   14.0    942.0\n",
       "32            新闻爱好者    733    9.0    901.0\n",
       "22        学校党建与思想教育     51    1.0    733.0\n",
       "35             江淮论坛    295   37.0    680.0\n",
       "46             电视技术   2446   23.0    680.0\n",
       "23             山西档案    629   19.0    577.0\n",
       "52             编辑学报    157    6.0    523.0\n",
       "37    河南大学学报(社会科学版)     44    1.0    511.0\n",
       "36           江苏社会科学    316    4.0    465.0\n",
       "1              东南学术    325   14.0    438.0\n",
       "19           南京社会科学    119    4.0    418.0\n",
       "0     上海大学学报(社会科学版)     66    4.0    411.0\n",
       "45           甘肃社会科学    273    4.0    389.0\n",
       "55  郑州大学学报(哲学社会科学版)     22    0.0    333.0\n",
       "54  西南民族大学学报(人文社科版)    254   14.0    325.0\n",
       "14             企业经济    227    3.0    297.0\n",
       "53           行政管理改革     88    1.0    286.0\n",
       "11             中州学刊    222    8.0    277.0\n",
       "12             人民论坛     58    1.0    270.0\n",
       "18             出版科学     35    0.0    263.0\n",
       "48            社会科学家    310    8.0    259.0\n",
       "24             开放导报    339    1.0    255.0\n",
       "10           中国远程教育    372    3.0    240.0\n",
       "44             理论视野     37    0.0    238.0\n",
       "40           湖南社会科学    168    3.0    191.0\n",
       "38  浙江师范大学学报(社会科学版)    293    6.0    172.0\n",
       "50             科技通报    234    0.0    117.0\n",
       "6              中国统计    298    3.0    115.0\n",
       "39  海南大学学报(人文社会科学版)     36    1.0    105.0"
      ]
     },
     "execution_count": 617,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data_subject"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 合并pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 700,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyecharts.charts.basic_charts.bar.Bar at 0x1cbf0f9fd90>"
      ]
     },
     "execution_count": 700,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Bar, Grid, Line, Liquid, Page, Pie\n",
    "from pyecharts.commons.utils import JsCode\n",
    "from pyecharts.components import Table\n",
    "from pyecharts.faker import Faker\n",
    "\n",
    "\n",
    "############################################################\n",
    "\n",
    "\n",
    "\n",
    "df_to_source = data\n",
    "df_source  = df_to_source[\"来源\"].value_counts()\n",
    "a = df_source.index\n",
    "type_source = a.tolist()\n",
    "count_source = df_source.tolist()\n",
    "\n",
    "source_rate=[]\n",
    "for i in range(0,len(count_source)):\n",
    "    j = str(count_source[i]/374)+'%'\n",
    "    source_rate.append(j)\n",
    "\n",
    "\n",
    "#-------------------------------------------------------\n",
    "def Bar1():\n",
    "    c = (\n",
    "    Bar()\n",
    "    .add_xaxis(\n",
    "        type_source[0:10]\n",
    "    )\n",
    "    .add_yaxis(\"期刊名称\", count_source[0:10])\n",
    "    .set_global_opts(\n",
    "        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n",
    "        title_opts=opts.TitleOpts(title=\"发表文献篇数Top10的期刊\"),\n",
    "    )\n",
    "    .set_series_opts(\n",
    "        label_opts=opts.LabelOpts(is_show=False),\n",
    "        markpoint_opts=opts.MarkPointOpts(\n",
    "            data=[\n",
    "                opts.MarkPointItem(type_=\"max\", name=\"最大值\"),\n",
    "                opts.MarkPointItem(type_=\"min\", name=\"最小值\"),\n",
    "                opts.MarkPointItem(type_=\"average\", name=\"平均值\"),\n",
    "            ]\n",
    "        ),\n",
    "    )\n",
    "    \n",
    ")\n",
    "    return c\n",
    "############################################################\n",
    "df_quote = dfC\n",
    "\n",
    "df_quote[\"被引\"] = df_quote[\"被引\"].astype('float')\n",
    "# df_quote.sort_values(by=\"被引\")\n",
    "df_quote.groupby('来源',as_index=False).sum()\n",
    "\n",
    "df_data_quote = df_data_quote.sort_values(by=\"被引\", ascending=False)\n",
    "\n",
    "type_source_quote = df_data_quote[\"来源\"].values.tolist()\n",
    "acount_source_quote = df_data_quote[\"被引\"].values.tolist()\n",
    "#-------------------------------------------------------\n",
    "def Bar2():\n",
    "    c = (\n",
    "    Bar()\n",
    "    .add_xaxis(\n",
    "        type_source_quote[0:10]\n",
    "    )\n",
    "    .add_yaxis(\"期刊名称\",acount_source_quote[0:10])\n",
    "    .set_global_opts(\n",
    "        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n",
    "        title_opts=opts.TitleOpts(title=\"文献被引数Top10的期刊\"),\n",
    "    )\n",
    "    \n",
    ")\n",
    "    return c\n",
    "###########################################################\n",
    "df_subject = dfC\n",
    "\n",
    "df_subject[\"下载\"] = df_subject[\"下载\"].astype('float')\n",
    "# df_quote.sort_values(by=\"被引\")\n",
    "df_subject.groupby('来源',as_index=False).sum()\n",
    "\n",
    "df_data_subject = df_data_subject.sort_values(by=\"下载\", ascending=False)\n",
    "\n",
    "type_source_subject = df_data_subject[\"来源\"].values.tolist()\n",
    "acount_source_subject = df_data_subject[\"下载\"].values.tolist()\n",
    "#-------------------------------------------------------\n",
    "def Bar3():\n",
    "    c = (\n",
    "    Bar()\n",
    "    .add_xaxis(\n",
    "        type_source_subject[0:10]\n",
    "    )\n",
    "    .add_yaxis(\"期刊名称\",acount_source_subject[0:10])\n",
    "    .set_global_opts(\n",
    "        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n",
    "        title_opts=opts.TitleOpts(title=\"文献下载量Top10的期刊\"),\n",
    "    )\n",
    "    \n",
    ")\n",
    "    return c\n",
    "#########################################################\n",
    "def page_simple_layout():\n",
    "    page = Page(layout=Page.SimplePageLayout)\n",
    "    page.add(\n",
    "        Bar1(),\n",
    "        Bar2(),\n",
    "        Bar3(),\n",
    "\n",
    "    )\n",
    "    page.render(\"page_simple_layout.html\")\n",
    "\n",
    "    \n",
    "    \n",
    "Bar1()\n",
    "Bar2()\n",
    "Bar3()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 组织或单位分析"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_to_AD = dfC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>A1</th>\n",
       "      <th>AD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>卢迪;邱子欣;</td>\n",
       "      <td>中国传媒大学新媒体研究院;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>314</td>\n",
       "      <td>金莉萍;</td>\n",
       "      <td>南京报业集团;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>162</td>\n",
       "      <td>文铭权;李朗;</td>\n",
       "      <td>四川日报报业集团驻眉山办事处;四川理工学院人文学院;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>208</td>\n",
       "      <td>白立华;刘永坚;施其明;</td>\n",
       "      <td>武汉理工数字传播工程有限公司;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>111</td>\n",
       "      <td>徐世平;</td>\n",
       "      <td>东方网;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>363</td>\n",
       "      <td>郭全中;</td>\n",
       "      <td>国家行政学院社会和文化教研部;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>353</td>\n",
       "      <td>孟威;</td>\n",
       "      <td>中国社会科学院;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>361</td>\n",
       "      <td>付玉辉;</td>\n",
       "      <td>中国联通集团综合部;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>358</td>\n",
       "      <td>孟伟;</td>\n",
       "      <td>中国传媒大学传播研究院欧洲传媒研究中心;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>271</td>\n",
       "      <td>吴信训;储靖伦;</td>\n",
       "      <td>上海大学;上海大学上海市社会科学创新研究基地;上海大学上海电影学院;</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号            A1                                  AD\n",
       "0    122       卢迪;邱子欣;                       中国传媒大学新媒体研究院;\n",
       "1    314          金莉萍;                             南京报业集团;\n",
       "2    162       文铭权;李朗;          四川日报报业集团驻眉山办事处;四川理工学院人文学院;\n",
       "3    208  白立华;刘永坚;施其明;                     武汉理工数字传播工程有限公司;\n",
       "4    111          徐世平;                                东方网;\n",
       "..   ...           ...                                 ...\n",
       "369  363          郭全中;                     国家行政学院社会和文化教研部;\n",
       "370  353           孟威;                            中国社会科学院;\n",
       "371  361          付玉辉;                          中国联通集团综合部;\n",
       "372  358           孟伟;                中国传媒大学传播研究院欧洲传媒研究中心;\n",
       "373  271      吴信训;储靖伦;  上海大学;上海大学上海市社会科学创新研究基地;上海大学上海电影学院;\n",
       "\n",
       "[374 rows x 3 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df_AD_准备 = df_to_AD[[\"A1\",\"AD\"]]\n",
    "df_AD_准备 =df_to_AD[[\"序号\",\"A1\",\"AD\"]]\n",
    "df_AD_准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                             [中国传媒大学新媒体研究院, ]\n",
       "1                                   [南京报业集团, ]\n",
       "2               [四川日报报业集团驻眉山办事处, 四川理工学院人文学院, ]\n",
       "3                           [武汉理工数字传播工程有限公司, ]\n",
       "4                                      [东方网, ]\n",
       "                        ...                   \n",
       "369                         [国家行政学院社会和文化教研部, ]\n",
       "370                                [中国社会科学院, ]\n",
       "371                              [中国联通集团综合部, ]\n",
       "372                    [中国传媒大学传播研究院欧洲传媒研究中心, ]\n",
       "373    [上海大学, 上海大学上海市社会科学创新研究基地, 上海大学上海电影学院, ]\n",
       "Name: AD, Length: 374, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_to_AD[\"AD\"].str.split(';')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>A1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>卢迪;邱子欣;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>314</td>\n",
       "      <td>金莉萍;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>162</td>\n",
       "      <td>文铭权;李朗;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>208</td>\n",
       "      <td>白立华;刘永坚;施其明;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>111</td>\n",
       "      <td>徐世平;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>363</td>\n",
       "      <td>郭全中;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>353</td>\n",
       "      <td>孟威;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>361</td>\n",
       "      <td>付玉辉;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>358</td>\n",
       "      <td>孟伟;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>271</td>\n",
       "      <td>吴信训;储靖伦;</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>374 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号            A1\n",
       "0    122       卢迪;邱子欣;\n",
       "1    314          金莉萍;\n",
       "2    162       文铭权;李朗;\n",
       "3    208  白立华;刘永坚;施其明;\n",
       "4    111          徐世平;\n",
       "..   ...           ...\n",
       "369  363          郭全中;\n",
       "370  353           孟威;\n",
       "371  361          付玉辉;\n",
       "372  358           孟伟;\n",
       "373  271      吴信训;储靖伦;\n",
       "\n",
       "[374 rows x 2 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_待合并 =  df_AD_准备.drop('AD', axis=1)\n",
    "df_待合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_准备合并的列 = df_to_AD['AD'].str.split(';', expand=True).stack().reset_index(level=1,drop=True).rename('AD')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>所属单位</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>中国传媒大学新媒体研究院</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>南京报业集团</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>四川日报报业集团驻眉山办事处</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>四川理工学院人文学院</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3</td>\n",
       "      <td>武汉理工数字传播工程有限公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>3</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4</td>\n",
       "      <td>东方网</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>4</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>5</td>\n",
       "      <td>中国新闻出版研究院</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>6</td>\n",
       "      <td>北大方正电子有限公司解决方案中心</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>7</td>\n",
       "      <td>国家行政学院社会和文化教研部</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>7</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>8</td>\n",
       "      <td>媒体融合与传播国家重点实验室[中国传媒大学]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>8</td>\n",
       "      <td>中国传媒大学</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>8</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    序号                    所属单位\n",
       "0    0            中国传媒大学新媒体研究院\n",
       "1    0                        \n",
       "2    1                  南京报业集团\n",
       "3    1                        \n",
       "4    2          四川日报报业集团驻眉山办事处\n",
       "5    2              四川理工学院人文学院\n",
       "6    2                        \n",
       "7    3          武汉理工数字传播工程有限公司\n",
       "8    3                        \n",
       "9    4                     东方网\n",
       "10   4                        \n",
       "11   5               中国新闻出版研究院\n",
       "12   5                        \n",
       "13   6        北大方正电子有限公司解决方案中心\n",
       "14   6                        \n",
       "15   7          国家行政学院社会和文化教研部\n",
       "16   7                        \n",
       "17   8  媒体融合与传播国家重点实验室[中国传媒大学]\n",
       "18   8                  中国传媒大学\n",
       "19   8                        "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dict_准备合并的列 = {\"序号\":df_准备合并的列.index,\"所属单位\":df_准备合并的列.values}\n",
    "df_准备合并列=pd.DataFrame(dict_准备合并的列)\n",
    "# a = df_准备合并列.dropna(axis=0,subset = [\"所属单位\"])\n",
    "df_准备合并列.head(20)\n",
    "# a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "null = []\n",
    "for i in range(0,804):\n",
    "#     print(i)\n",
    "    if df_准备合并列[\"所属单位\"][i] == '':\n",
    "        null.append(i)\n",
    "        new_df = df_准备合并列.drop(null)\n",
    "    i = i+1\n",
    "#     print(\"+\",i)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>所属单位</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>中国传媒大学新媒体研究院</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>南京报业集团</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>四川日报报业集团驻眉山办事处</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>四川理工学院人文学院</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3</td>\n",
       "      <td>武汉理工数字传播工程有限公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>796</th>\n",
       "      <td>371</td>\n",
       "      <td>中国联通集团综合部</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>798</th>\n",
       "      <td>372</td>\n",
       "      <td>中国传媒大学传播研究院欧洲传媒研究中心</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>800</th>\n",
       "      <td>373</td>\n",
       "      <td>上海大学</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>801</th>\n",
       "      <td>373</td>\n",
       "      <td>上海大学上海市社会科学创新研究基地</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>802</th>\n",
       "      <td>373</td>\n",
       "      <td>上海大学上海电影学院</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>466 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号                 所属单位\n",
       "0      0         中国传媒大学新媒体研究院\n",
       "2      1               南京报业集团\n",
       "4      2       四川日报报业集团驻眉山办事处\n",
       "5      2           四川理工学院人文学院\n",
       "7      3       武汉理工数字传播工程有限公司\n",
       "..   ...                  ...\n",
       "796  371            中国联通集团综合部\n",
       "798  372  中国传媒大学传播研究院欧洲传媒研究中心\n",
       "800  373                 上海大学\n",
       "801  373    上海大学上海市社会科学创新研究基地\n",
       "802  373           上海大学上海电影学院\n",
       "\n",
       "[466 rows x 2 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "中国人民大学新闻学院         12\n",
       "中国社会科学院新闻与传播研究所    10\n",
       "中国传媒大学新媒体研究院        9\n",
       "中国传媒大学              8\n",
       "清华大学新闻与传播学院         6\n",
       "                   ..\n",
       "南京报业传媒集团            1\n",
       "中国广播电影电视社会组织联合会     1\n",
       "贵州出版集团数字出版中心        1\n",
       "武汉工程大学法商学院          1\n",
       "北京时间新闻媒体有限公司        1\n",
       "Name: 所属单位, Length: 349, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_place = new_df[\"所属单位\"].value_counts()\n",
    "df_place"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = df_place.index\n",
    "type_place = a.tolist()\n",
    "count_place = df_place.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_forWordcloud = []\n",
    "for i in range(0,len(type_place)):\n",
    "    a = (type_place[i],count_place[i])\n",
    "    data_forWordcloud.append(a)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\wordcloud_diamond.html'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import WordCloud\n",
    "from pyecharts.globals import SymbolType\n",
    "\n",
    "(\n",
    "    WordCloud()\n",
    "    .add(\"\", data_forWordcloud,shape=SymbolType.DIAMOND)\n",
    "    .set_global_opts(title_opts=opts.TitleOpts(title=\"WordCloud-shape-diamond\"))\n",
    "    .render(\"wordcloud_diamond.html\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "type_place.reverse()\n",
    "count_place.reverse()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\zjl\\\\Desktop\\\\Python数据分析\\\\期末项目\\\\AI_BD_媒介融合\\\\pictorialbar_base.html'"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import PictorialBar\n",
    "from pyecharts.globals import SymbolType\n",
    "\n",
    "(\n",
    "    PictorialBar(init_opts=opts.InitOpts(width=\"700px\"))\n",
    "    .add_xaxis(type_place)\n",
    "    .add_yaxis(\n",
    "        \"\",\n",
    "        count_place,\n",
    "        label_opts=opts.LabelOpts(is_show=False),\n",
    "        symbol_size=10,\n",
    "        symbol_repeat=\"fixed\",\n",
    "        symbol_offset=[0,0],\n",
    "        symbol_margin=2,\n",
    "        category_gap=\"30%\",\n",
    "        is_symbol_clip=True,\n",
    "        symbol=SymbolType.ROUND_RECT,\n",
    "    )\n",
    "    .reversal_axis()\n",
    "    .set_global_opts(\n",
    "        title_opts=opts.TitleOpts(title=\"各单位发表文献情况\"),\n",
    "        xaxis_opts=opts.AxisOpts(is_show=False),\n",
    "        yaxis_opts=opts.AxisOpts(\n",
    "            axistick_opts=opts.AxisTickOpts(is_show=False),\n",
    "            axisline_opts=opts.AxisLineOpts(\n",
    "                linestyle_opts=opts.LineStyleOpts(opacity=0)\n",
    "            ),\n",
    "        ),\n",
    "        legend_opts=opts.LegendOpts(type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"),\n",
    "    )\n",
    "    .set_series_opts(\n",
    "        label_opts=opts.LabelOpts(is_show=False),\n",
    "        markpoint_opts=opts.MarkPointOpts(\n",
    "            data=[\n",
    "                opts.MarkPointItem(type_=\"max\", name=\"最大值\"),\n",
    "            ]\n",
    "        ),\n",
    "    )\n",
    "    .render(\"pictorialbar_base.html\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Page,PictorialBar,WordCloud\n",
    "from pyecharts.commons.utils import JsCode\n",
    "from pyecharts.components import Table\n",
    "\n",
    "\n",
    "def to_WordCloud():\n",
    "    c = (\n",
    "    WordCloud()\n",
    "    .add(\"\", data_forWordcloud,shape=SymbolType.DIAMOND)\n",
    "    .set_global_opts(title_opts=opts.TitleOpts(title=\"词云图——各单位发表文献情况\"))\n",
    ")\n",
    "\n",
    "    return c\n",
    "\n",
    "######################################################\n",
    "def to_PictorialBar():\n",
    "    c = (\n",
    "    PictorialBar()\n",
    "    .add_xaxis(type_place)\n",
    "    .add_yaxis(\n",
    "        \"\",\n",
    "        count_place,\n",
    "        label_opts=opts.LabelOpts(is_show=False),\n",
    "        symbol_size=10,\n",
    "        symbol_repeat=\"fixed\",\n",
    "        symbol_offset=[0,0],\n",
    "        symbol_margin=2,\n",
    "        category_gap=\"30%\",\n",
    "        is_symbol_clip=True,\n",
    "        symbol=SymbolType.ROUND_RECT,\n",
    "    )\n",
    "    .reversal_axis()\n",
    "    .set_global_opts(\n",
    "        title_opts=opts.TitleOpts(title=\"象形柱状图——各单位发表文献情况\"),\n",
    "        xaxis_opts=opts.AxisOpts(is_show=False),\n",
    "        yaxis_opts=opts.AxisOpts(\n",
    "            axistick_opts=opts.AxisTickOpts(is_show=False),\n",
    "            axisline_opts=opts.AxisLineOpts(\n",
    "                linestyle_opts=opts.LineStyleOpts(opacity=0)\n",
    "            ),\n",
    "        ),\n",
    "        legend_opts=opts.LegendOpts(type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"),\n",
    "    )\n",
    "    .set_series_opts(\n",
    "        label_opts=opts.LabelOpts(is_show=False),\n",
    "        markpoint_opts=opts.MarkPointOpts(\n",
    "            data=[\n",
    "                opts.MarkPointItem(type_=\"max\", name=\"最大值\"),\n",
    "            ]\n",
    "        ),\n",
    "    )\n",
    ")\n",
    "\n",
    "    return c\n",
    "\n",
    "\n",
    "def page3():\n",
    "    page = Page(layout=Page.SimplePageLayout)\n",
    "    page.add(\n",
    "        to_WordCloud(),\n",
    "        to_PictorialBar(),\n",
    "        \n",
    "        \n",
    "    )\n",
    "    page.render(\"page3.html\")\n",
    "\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "to_PictorialBar()\n",
    "to_WordCloud()\n",
    "page3()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### VOS可视化\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts.components import Image\n",
    "from pyecharts.options import ComponentTitleOpts\n",
    "\n",
    "\n",
    "def image_mi10_feature():\n",
    "    c =( \n",
    "    Image()\n",
    "   .add(\n",
    "        src=\"VOS.png\",\n",
    "        style_opts={\"width\": \"700px\", \"height\": \"500px\", \"top\": \"1165px\", \"left\": \"47px\", \"style\": \"margin-right: 20px\"},)\n",
    "\n",
    "    .set_global_opts(\n",
    "        title_opts=ComponentTitleOpts(title=\"CNKI关键词共现\", subtitle=\"基于LDA模型分析\")\n",
    "    )\n",
    "    )\n",
    "    return c\n",
    "\n",
    "def image_honor_feature():\n",
    "    c = (  \n",
    "    Image()\n",
    "# 如果图片url是本地链接，需要在url.py文件中加一句话\n",
    "\n",
    "\n",
    "    .add(\n",
    "        src=\"author.png\",\n",
    "        style_opts={\"width\": \"700px\", \"height\": \"500px\", \"top\": \"1165px\", \"right\": \"50px\", \"style\": \"margin-left: 20px\"},\n",
    "    )\n",
    "    .set_global_opts(\n",
    "        title_opts=ComponentTitleOpts(title=\"CNKI作者网络\", subtitle=\"基于LDA模型分析\"),\n",
    "    )\n",
    "    )\n",
    "    return c\n",
    "\n",
    "def page4():\n",
    "    page = Page(layout=Page.SimplePageLayout)\n",
    "    page.add(\n",
    "        image_mi10_feature(),\n",
    "        image_honor_feature(),\n",
    "        \n",
    "        \n",
    "    )\n",
    "    page.render(\"page4.html\")\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_mi10_feature()\n",
    "image_honor_feature()\n",
    "page4()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "414.286px",
    "left": "985.196px",
    "top": "141.714px",
    "width": "164.988px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
